diff --git a/.cargo/config.toml b/.cargo/config.toml index 96db79f3cb..eb03b08b51 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -9,3 +9,4 @@ # 2. Compile the C/C++ examples using CMake # 3. Run each example and report pass/fail ffi-test = "run --package tools --bin ffi_test --" + diff --git a/.github/workflows/pr-binary-size.yml b/.github/workflows/pr-binary-size.yml new file mode 100644 index 0000000000..aed4573207 --- /dev/null +++ b/.github/workflows/pr-binary-size.yml @@ -0,0 +1,62 @@ +name: Binary Size + +on: + pull_request: + +jobs: + binary-size: + runs-on: ubuntu-latest + permissions: + pull-requests: write + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 # need full history to check out base ref + + - name: Install nightly + rust-src + run: rustup toolchain install nightly --component rust-src + + - uses: Swatinem/rust-cache@f13886b937689c021905a6b90929199931d60db1 # v2.8.1 + with: + prefix-key: v0-rust-binary-size + cache-targets: true + cache-on-failure: true + workspaces: ". -> target" + + - name: Compare binary size + run: | + BASE=$(git merge-base origin/main HEAD) + bash size-benchmark/compare-size.sh \ + --base "$BASE" \ + --head ${{ github.sha }} \ + --output size-report.md + + - name: Post PR comment + uses: actions/github-script@d746ffe35508b1917358783b479e04febd2b8f71 # v9.0.0 + with: + script: | + const fs = require('fs'); + const body = fs.readFileSync('size-report.md', 'utf8'); + const marker = ''; + const comments = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const existing = comments.data.find(c => c.body.includes(marker)); + const fullBody = marker + '\n' + body; + if (existing) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: existing.id, + body: fullBody, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: fullBody, + }); + } diff --git a/.gitignore b/.gitignore index 5a4edd14ce..8ec220b366 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ examples/cxx/exporter_manager.exe examples/cxx/profiling examples/cxx/profiling.exe profile.pprof +.worktree-size-* diff --git a/Cargo.lock b/Cargo.lock index 1a4c45ea9a..34997ba713 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5281,6 +5281,25 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" +[[package]] +name = "size-benchmark" +version = "32.0.0" +dependencies = [ + "datadog-ffe-ffi", + "glob", + "libdd-common-ffi", + "libdd-crashtracker-ffi", + "libdd-data-pipeline-ffi", + "libdd-ddsketch-ffi", + "libdd-library-config-ffi", + "libdd-log-ffi", + "libdd-profiling-ffi", + "libdd-shared-runtime-ffi", + "libdd-telemetry-ffi", + "symbolizer-ffi", + "syn 2.0.87", +] + [[package]] name = "slab" version = "0.4.9" diff --git a/Cargo.toml b/Cargo.toml index 1d75806752..c18d1cc40e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,6 +53,7 @@ members = [ "libdd-http-client", "libdd-log", "libdd-log-ffi", + "size-benchmark", ] # https://doc.rust-lang.org/cargo/reference/resolver.html @@ -101,6 +102,14 @@ debug = false incremental = false opt-level = 3 +# Profile used exclusively by the size-benchmark crate. +# Inherits release then tightens every size knob that cannot be set per-package. +[profile.release-size] +inherits = "release" +opt-level = "z" # "z" vs "s": skip loop vectorization too +strip = true +panic = "abort" + # https://camshaft.github.io/bolero/library-installation.html [profile.fuzz] inherits = "dev" diff --git a/libdd-ddsketch-ffi/src/lib.rs b/libdd-ddsketch-ffi/src/lib.rs index cc306c9b8a..3ae4ad4ca1 100644 --- a/libdd-ddsketch-ffi/src/lib.rs +++ b/libdd-ddsketch-ffi/src/lib.rs @@ -130,6 +130,15 @@ pub extern "C" fn ddog_Vec_U8_drop(_vec: ffi::Vec) { // The Vec will be automatically dropped when it goes out of scope } +/// Dummy function for size-benchmark verification. +/// +/// # Safety +/// Always safe to call. +#[no_mangle] +pub unsafe extern "C" fn ddog_ddsketch_dummy_size_bench() -> u64 { + 42 +} + #[cfg(test)] mod tests { use super::*; diff --git a/size-benchmark/Cargo.toml b/size-benchmark/Cargo.toml new file mode 100644 index 0000000000..ede79b4bde --- /dev/null +++ b/size-benchmark/Cargo.toml @@ -0,0 +1,31 @@ +# Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "size-benchmark" +edition.workspace = true +version.workspace = true +rust-version.workspace = true +license.workspace = true +publish = false + +[[bin]] +name = "size-benchmark" +path = "src/main.rs" + +[build-dependencies] +glob = "0.3.1" +syn = { version = "2.0.87", features = ["full", "parsing"] } + +[dependencies] +libdd-common-ffi = { path = "../libdd-common-ffi" } +libdd-profiling-ffi = { path = "../libdd-profiling-ffi" } +libdd-crashtracker-ffi = { path = "../libdd-crashtracker-ffi" } +libdd-telemetry-ffi = { path = "../libdd-telemetry-ffi" } +libdd-data-pipeline-ffi = { path = "../libdd-data-pipeline-ffi" } +libdd-ddsketch-ffi = { path = "../libdd-ddsketch-ffi" } +libdd-library-config-ffi = { path = "../libdd-library-config-ffi" } +libdd-log-ffi = { path = "../libdd-log-ffi" } +datadog-ffe-ffi = { path = "../datadog-ffe-ffi" } +symbolizer-ffi = { path = "../symbolizer-ffi" } +libdd-shared-runtime-ffi = { path = "../libdd-shared-runtime-ffi" } diff --git a/size-benchmark/build-size-optimized.sh b/size-benchmark/build-size-optimized.sh new file mode 100755 index 0000000000..ec79fbdc81 --- /dev/null +++ b/size-benchmark/build-size-optimized.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Build the size-benchmark binary with the same aggressive size optimizations +# that our most critical users apply, so the measured size is representative. +# +# On Linux → builds for {host-arch}-unknown-linux-musl (static, musl libc) +# On macOS → builds for the native Darwin target (no musl available on macOS) +# +# Requires: rustup with nightly toolchain + the resolved target installed. +# On Linux the musl target also needs a musl C toolchain (e.g. musl-tools package). +# +# Usage: ./size-benchmark/build-size-optimized.sh [extra cargo args] +# Output: binary size in bytes on stdout (last line) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_ROOT="${WORKSPACE_ROOT:-$(cd "$SCRIPT_DIR/.." && pwd)}" + +ARCH="$(uname -m | sed 's/arm64/aarch64/')" +OS="$(uname -s)" + +case "$OS" in + Linux) TARGET="${ARCH}-unknown-linux-gnu" ;; + Darwin) TARGET="${ARCH}-apple-darwin" ;; + *) echo "Unsupported OS: $OS" >&2; exit 1 ;; +esac + +rustup target add "$TARGET" --toolchain nightly >/dev/null 2>&1 || true + +RUSTFLAGS="\ + -Zunstable-options \ + -Cpanic=immediate-abort \ + -Zlocation-detail=none \ + -Zfmt-debug=none \ +" \ +cargo +nightly build \ + -Z build-std=std,panic_abort \ + -Z build-std-features= \ + --target "$TARGET" \ + --profile release-size \ + -p size-benchmark \ + --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ + "$@" + +TARGET_DIR="${CARGO_TARGET_DIR:-$WORKSPACE_ROOT/target}" +BINARY="$TARGET_DIR/$TARGET/release-size/size-benchmark" +wc -c < "$BINARY" diff --git a/size-benchmark/build.rs b/size-benchmark/build.rs new file mode 100644 index 0000000000..2a674b5e9b --- /dev/null +++ b/size-benchmark/build.rs @@ -0,0 +1,106 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Scans all *-ffi/src/**/*.rs files, finds every `#[no_mangle] pub extern "C"` function, +//! and emits $OUT_DIR/fptrs.rs: +//! +//! extern "C" { fn ddog_foo(...); fn ddog_bar(...); ... } +//! static FPTRS: &[unsafe extern "C" fn()] = &[ddog_foo as _, ddog_bar as _, ...]; +//! +//! Storing every symbol in a non-dead static forces the linker to include every function +//! body (and its transitive call graph) in the final binary, which is what we want for +//! measuring realistic binary size after LTO. + +use std::fmt::Write as _; +use std::path::Path; +use std::{env, fs}; +use syn::{Item, Visibility}; + +const FFI_DIRS: &[&str] = &[ + "libdd-common-ffi/src", + "libdd-profiling-ffi/src", + "libdd-crashtracker-ffi/src", + "libdd-telemetry-ffi/src", + "libdd-data-pipeline-ffi/src", + "libdd-ddsketch-ffi/src", + "libdd-library-config-ffi/src", + "libdd-log-ffi/src", + "datadog-ffe-ffi/src", + "symbolizer-ffi/src", + "libdd-shared-runtime-ffi/src", +]; + +fn main() { + let manifest = env::var("CARGO_MANIFEST_DIR").unwrap(); + let workspace = Path::new(&manifest).parent().unwrap(); + let current_os = env::var("CARGO_CFG_TARGET_OS").unwrap_or_default(); + + let mut names: Vec = Vec::new(); + + for dir in FFI_DIRS { + println!("cargo:rerun-if-changed=../{dir}"); + let src = workspace.join(dir); + let pattern = format!("{}/**/*.rs", src.display()); + for path in glob::glob(&pattern).unwrap().flatten() { + let Ok(source) = fs::read_to_string(&path) else { + continue; + }; + let Ok(file) = syn::parse_file(&source) else { + continue; + }; + for item in &file.items { + let Item::Fn(f) = item else { continue }; + if !matches!(f.vis, Visibility::Public(_)) { + continue; + } + let Some(abi) = &f.sig.abi else { continue }; + if !matches!(&abi.name, Some(n) if n.value() == "C") { + continue; + } + if !f.attrs.iter().any(|a| a.path().is_ident("no_mangle")) { + continue; + } + // Skip items gated to windows on non-windows builds + let is_windows_only = f.attrs.iter().any(|a| { + if !a.path().is_ident("cfg") { + return false; + } + let Ok(list) = a.meta.require_list() else { + return false; + }; + list.tokens.to_string().contains("windows") + }); + if is_windows_only && current_os != "windows" { + continue; + } + names.push(f.sig.ident.to_string()); + } + } + } + + names.sort(); + names.dedup(); + + let mut out = String::new(); + writeln!( + out, + "// Auto-generated by size-benchmark/build.rs — DO NOT EDIT" + ) + .unwrap(); + writeln!(out, "extern \"C\" {{").unwrap(); + for name in &names { + writeln!(out, " fn {name}();").unwrap(); + } + writeln!(out, "}}").unwrap(); + writeln!(out).unwrap(); + writeln!(out, "#[used]").unwrap(); + writeln!(out, "static FPTRS: &[unsafe extern \"C\" fn()] = &[").unwrap(); + for name in &names { + writeln!(out, " {name} as _,").unwrap(); + } + writeln!(out, "];").unwrap(); + writeln!(out, "// {} symbols", names.len()).unwrap(); + + let out_dir = env::var("OUT_DIR").unwrap(); + fs::write(Path::new(&out_dir).join("fptrs.rs"), out).unwrap(); +} diff --git a/size-benchmark/cargo-bloat-optimized.sh b/size-benchmark/cargo-bloat-optimized.sh new file mode 100755 index 0000000000..56fac9386b --- /dev/null +++ b/size-benchmark/cargo-bloat-optimized.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +# Build the size-benchmark binary with the same aggressive size optimizations +# that our most critical users apply, so the measured size is representative. +# +# On Linux → builds for {host-arch}-unknown-linux-musl (static, musl libc) +# On macOS → builds for the native Darwin target (no musl available on macOS) +# +# Requires: rustup with nightly toolchain + the resolved target installed. +# On Linux the musl target also needs a musl C toolchain (e.g. musl-tools package). +# +# Usage: ./size-benchmark/build-size-optimized.sh [extra cargo args] +# Output: binary size in bytes on stdout (last line) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +WORKSPACE_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" + +ARCH="$(uname -m | sed 's/arm64/aarch64/')" +OS="$(uname -s)" + +case "$OS" in + Linux) TARGET="${ARCH}-unknown-linux-musl" ;; + Darwin) TARGET="${ARCH}-apple-darwin" ;; + *) echo "Unsupported OS: $OS" >&2; exit 1 ;; +esac + +rustup target add "$TARGET" --toolchain nightly 2>/dev/null || true + +RUSTFLAGS="\ + -Zunstable-options \ + -Cpanic=immediate-abort \ + -Zlocation-detail=none \ + -Zfmt-debug=none \ +" \ +cargo +nightly bloat \ + -Z build-std=std,panic_abort \ + -Z build-std-features= \ + --target "$TARGET" \ + --profile release-size \ + -p size-benchmark \ + --manifest-path "$WORKSPACE_ROOT/Cargo.toml" \ + "$@" diff --git a/size-benchmark/compare-size.sh b/size-benchmark/compare-size.sh new file mode 100755 index 0000000000..b665955231 --- /dev/null +++ b/size-benchmark/compare-size.sh @@ -0,0 +1,128 @@ +#!/usr/bin/env bash +# Compare binary size of the size-benchmark between two git refs. +# +# Usage: +# ./size-benchmark/compare-size.sh --base --head [--output ] +# +# Output: markdown table printed to stdout (and optionally to --output file). + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +BUILD_SCRIPT="$SCRIPT_DIR/build-size-optimized.sh" + +BASE_REF="" +HEAD_REF="" +OUTPUT_FILE="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --base) BASE_REF="$2"; shift 2 ;; + --head) HEAD_REF="$2"; shift 2 ;; + --output) OUTPUT_FILE="$2"; shift 2 ;; + *) echo "Unknown argument: $1" >&2; exit 1 ;; + esac +done + +if [[ -z "$BASE_REF" || -z "$HEAD_REF" ]]; then + echo "Usage: $0 --base --head [--output ]" >&2 + exit 1 +fi + +format_bytes() { + local b=$1 + if [[ $b -lt 1024 ]]; then echo "${b} B" + elif [[ $b -lt $((1024*1024)) ]]; then printf "%.2f KB\n" "$(echo "scale=4; $b/1024" | bc)" + else printf "%.2f MB\n" "$(echo "scale=4; $b/1024/1024" | bc)" + fi +} + +# Build a ref in a worktree placed inside the repo root so that Cargo's +# path-based fingerprints are stable across runs (no /tmp/tmp.xxx/ variance). +build_ref() { + local ref="$1" + local label="$2" + local short + short="$(git -C "$REPO_ROOT" rev-parse --short "$ref")" + local worktree="$REPO_ROOT/.worktree-size-$label" + + echo "Building $label ($short)…" >&2 + + git -C "$REPO_ROOT" worktree add --detach "$worktree" "$ref" 2>&1 | sed 's/^/ /' >&2 + + # cargo writes to stderr; wc -c is the only stdout line. + # Always use the script from the current checkout (base may not have it). + # Override WORKSPACE_ROOT so the script builds the worktree's sources. + # CARGO_TARGET_DIR is fixed so both refs share the same build cache. + # If this ref predates the size-benchmark crate, return 0 (not an error). + if ! grep -q '"size-benchmark"' "$worktree/Cargo.toml" 2>/dev/null; then + echo " (size-benchmark not present on $label, skipping)" >&2 + git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true + rm -rf "$worktree" + echo "0" + return + fi + + CARGO_TARGET_DIR="$REPO_ROOT/target" \ + WORKSPACE_ROOT="$worktree" \ + bash "$BUILD_SCRIPT" 2>&3 + + git -C "$REPO_ROOT" worktree remove --force "$worktree" 2>/dev/null || true + rm -rf "$worktree" +} + +BASE_SHORT="$(git -C "$REPO_ROOT" rev-parse --short "$BASE_REF")" +HEAD_SHORT="$(git -C "$REPO_ROOT" rev-parse --short "$HEAD_REF")" + +BASE_BYTES="$(build_ref "$BASE_REF" "base" 3>&2)" +HEAD_BYTES="$(build_ref "$HEAD_REF" "head" 3>&2)" + +HEAD_FMT="$(format_bytes "$HEAD_BYTES")" + +if [[ "$BASE_BYTES" -eq 0 ]]; then + TABLE="$(cat <&2 +echo "$TABLE" + +if [[ -n "$OUTPUT_FILE" ]]; then + echo "$TABLE" > "$OUTPUT_FILE" + echo "Written to $OUTPUT_FILE" >&2 +fi diff --git a/size-benchmark/src/main.rs b/size-benchmark/src/main.rs new file mode 100644 index 0000000000..19a654057d --- /dev/null +++ b/size-benchmark/src/main.rs @@ -0,0 +1,19 @@ +// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +// Pull in all FFI crates so their symbols are available to the extern "C" block below. +extern crate datadog_ffe_ffi; +extern crate datadog_profiling_ffi; +extern crate libdd_common_ffi; +extern crate libdd_crashtracker_ffi; +extern crate libdd_data_pipeline_ffi; +extern crate libdd_ddsketch_ffi; +extern crate libdd_library_config_ffi; +extern crate libdd_log_ffi; +extern crate libdd_shared_runtime_ffi; +extern crate libdd_telemetry_ffi; +extern crate symbolizer_ffi; + +include!(concat!(env!("OUT_DIR"), "/fptrs.rs")); + +fn main() {}