diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c5765cd..bea9935 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -99,3 +99,27 @@ jobs:
           body_path: /tmp/release_notes.md
           draft: false
           prerelease: false
+
+  publish-pio:
+    needs: release
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install PlatformIO
+        run: pip install platformio
+      - name: Publish to PlatformIO Registry
+        env:
+          PLATFORMIO_AUTH_TOKEN: ${{ secrets.PLATFORMIO_AUTH_TOKEN }}
+        run: pio pkg publish . --no-interactive
+
+  publish-espressif:
+    needs: release
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install compote (ESP Component Manager)
+        run: pip install idf-component-manager
+      - name: Publish to Espressif Component Registry
+        env:
+          IDF_COMPONENT_API_TOKEN: ${{ secrets.IDF_COMPONENT_API_TOKEN }}
+        run: compote component upload --name fr_math --namespace deftio
diff --git a/.gitignore b/.gitignore
index 9be713d..10a7a12 100644
--- a/.gitignore
+++ b/.gitignore
@@ -56,8 +56,8 @@ htmlcov/
 .idea/
 *.sublime-*
 
-# Claude Code local files
-CLAUDE.local.md
+# Claude (Anthropic) project-local files — not part of the library
+CLAUDE*.md
 .claude/
 
 # OS files
diff --git a/README.md b/README.md
index 42982fc..940bbf8 100644
--- a/README.md
+++ b/README.md
@@ -1,98 +1,104 @@
 [![License](https://img.shields.io/badge/License-BSD%202--Clause-blue.svg)](https://opensource.org/licenses/BSD-2-Clause)
 [![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml)
-[![Coverage](https://img.shields.io/badge/coverage-98%25-brightgreen.svg)](#building-and-testing)
+[![Coverage](https://img.shields.io/badge/coverage-96%25-brightgreen.svg)](#building-and-testing)
 [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://deftio.github.io/fr_math/)
-[![Version](https://img.shields.io/badge/version-2.0.7-blue.svg)](release_notes.md)  
-  
+[![Version](https://img.shields.io/badge/version-2.0.8-blue.svg)](release_notes.md)
+
 [![PlatformIO](https://img.shields.io/badge/PlatformIO-library-teal.svg)](https://registry.platformio.org/libraries/deftio/fr_math)
 [![Arduino](https://img.shields.io/badge/Arduino-library-teal.svg)](https://github.com/deftio/fr_math)
 [![ESP Component](https://img.shields.io/badge/ESP--IDF-component-teal.svg)](https://components.espressif.com/components/deftio/fr_math)
 
-
 # FR_Math: A C Language Fixed-Point Math Library for Embedded Systems
 
-FR_Math is a compact, integer-only fixed-point math library built for
-systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from
-legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it
-provides a full suite of math primitives — trigonometry, logarithms,
-roots, transforms, and signal generators — while remaining
-deterministic, portable, and small. Unlike traditional fixed-point
-libraries, FR_Math lets the caller choose the binary point per
-operation, trading precision and range explicitly instead of locking
-into a single format. Pure C (C99/C11/C17) with an optional C++
-2D-transform wrapper. Compiles under Arduino. Zero dependencies
-beyond `<stdint.h>`.
+See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API reference, examples, fixed-point primer, build instructions.  
+
+ 
+**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small.
+ 
+Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (radix) per operation, trading precision and range explicitly instead of locking into a single format. Pure C with C++ wrappers — compiles cleanly under C99, C11, C17, C++11 and later.
+Compiles under Arduino, PlatformIO, Espressif, many older embedded targets.   
+Zero dependencies beyond `<stdint.h>`.
+
 
 ### Measured accuracy
 
 Errors below are measured at Q16.16 (s15.16). All functions accept any
 radix — Q16.16 is just the reference point for the table.
-Percent errors skip expected values near zero (|expected| < 0.01).
 
 At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the
-number of fractional bits available. All functions support radix 0 to 30.
-
-<!-- ACCURACY_TABLE_START -->
-| Function | Max err (%) | Avg err (%) | Note |
-|---|---:|---:|---|
-| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials |
-| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) |
-| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
-| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
-| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
-| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
-| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
-| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
-| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
+number of fractional bits available.
+
+<!-- ACCURACY_TABLE_START -->
+| Function | Max err (%)*| Avg err (%) | Note |
+|---|---:|---:|---|
+| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig |
+| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns |
+| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig |
+| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles |
+| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent |
+| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output |
+| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe |
+| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint |
+| sqrt | 0.0000 | 0.0000 | Round-to-nearest |
+| log2 | 0.0116 | 0.0016 | shift/add only for speed |
+| pow2 | 0.0018 | 0.0004 | shift/add only for speed |
+| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed |
+| exp | 0.0003 | 0.0000 | shift/add only for speed |
+| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
+| pow10 | 0.0005 | 0.0000 | shift/add only for speed |
+| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
+
+*Relative error; reference clamped to 1% of full-scale output.
 <!-- ACCURACY_TABLE_END -->
-
+ 
 ### What's in the box
 
 | Area | Functions |
-|---|---|
-| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
-| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
-| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` |
-| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
-| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
-| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
-| Wave generators | `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` |
-| Envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_release`, `fr_adsr_step` |
-| 2D transforms | `FR_Matrix2D_CPT` (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) |
-| Formatted output | `FR_printNumD`, `FR_printNumF`, `FR_printNumH`, `FR_numstr` |
-
-### Library size (FR_math.c only, `-Os`)
-
-Compiled object code sizes on select platforms (static test build). Your
-sizes may vary depending on optimization and linker settings. Sizes
-include all code and internal tables; everything is ROMable.
+| --- | --- |
+| Arithmetic | FR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX |
+| Utility | FR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN |
+| Trig (degree) | fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI |
+| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam |
+| Inverse trig | FR_atan, FR_atan2, FR_asin, FR_acos |
+| Log / exp | FR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 |
+| Roots | FR_sqrt, FR_hypot, FR_hypot_fast8 |
+| Wave generators | fr_wave_sqr, fr_wave_pwm, fr_wave_tri, fr_wave_saw, fr_wave_tri_morph, fr_wave_noise |
+| Envelope | fr_adsr_init, fr_adsr_trigger, fr_adsr_release, fr_adsr_step |
+| 2D transforms | FR_Matrix2D_CPT (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) |
+| Formatted output | FR_printNumD, FR_printNumF, FR_printNumH, FR_numstr |
+
+### Compiled library size (FR_math.c only, `-Os`)
+
+.text section sizes, all code + internal tables, ROMable. Sorted 8-bit → 64-bit.
 
 <!-- SIZE_TABLE_START -->
-| Target | Core | Full |
-|--------|-----:|-----:|
-| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB |
-| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB |
-| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB |
-| ESP32 (Xtensa) | 3.5 KB | 5.2 KB |
-| 68k | 3.5 KB | 5.3 KB |
-| x86-64 (GCC) | 3.5 KB | 5.7 KB |
-| x86-32 | 4.5 KB | 6.8 KB |
-| MSP430 (16-bit) | 5.9 KB | 8.9 KB |
-| 68HC11 | 10.8 KB | 16.0 KB |
-| AVR (ATmega328P) | 7.0 KB | 10.6 KB |
+| Target | Lean | Core | Full |
+| --- | ---:| ---:| ---:|
+| AVR ATmega328P (8-bit) | 9.2 KB | 12.8 KB | 15.4 KB |
+| 68HC11 (8-bit) | 13.3 KB | 18.4 KB | 22.6 KB |
+| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB |
+| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB |
+| Cortex-M4 (32-bit) | 3.3 KB | 4.4 KB | 5.5 KB |
+| Cortex-M0 (32-bit) | 3.4 KB | 4.5 KB | 5.7 KB |
+| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB |
+| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB |
+| m68k (32-bit) | 4.4 KB | 6.2 KB | 7.8 KB |
+| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB |
+| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB |
+| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB |
+| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB |
+| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB |
 <!-- SIZE_TABLE_END -->
 
-Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves).
-The optional 2D module adds ~1 KB.
-\* MSP430, 68HC11, and AVR are 8/16-bit — every 32-bit operation expands to multiple instructions.
-See [`docker/`](docker/) for the cross-compile setup.
+**Lean** (`-DFR_LEAN -DFR_NO_PRINT`): radian trig, inv trig, log/exp, sqrt.  
+**Core** (`-DFR_CORE_ONLY`): Lean + degree/BAM trig, log10, hypot.  
+**Full** (default): Core + formatted print, wave generators, ADSR envelope.
+Optional C++ 2D module adds ~1 KB.  
+8/16-bit targets expand some 32-bit op to multiple instructions — hence the larger sizes.  
+See [Building & Testing](docs/building.md) for the full cross-compile setup.  
 
 ### Lean build options
 
@@ -101,10 +107,10 @@ for ROM-constrained targets. Define them before including `FR_math.h`
 (or pass `-D` on the compiler command line):
 
 | Define | What it removes | Typical savings |
-|---|---|---|
-| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB |
-| `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB |
-| `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB |
+| --- | --- | --- |
+| FR_CORE_ONLY | Everything below (print + waves) | ~1.9 KB |
+| FR_NO_PRINT | FR_printNumF, FR_printNumD, FR_printNumH, FR_numstr | ~1.3 KB |
+| FR_NO_WAVES | fr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC | ~0.6 KB |
 
 `FR_CORE_ONLY` is a convenience shorthand that defines both
 `FR_NO_PRINT` and `FR_NO_WAVES` in one step.
@@ -129,7 +135,7 @@ make lib       # build static library
 make test      # run all tests (unit, TDD characterization, 2D)
 ```
 
-## Quick taste
+## Example
 
 ```c
 #include "FR_math.h"
@@ -162,18 +168,23 @@ s32 two  = I2FR(2, R);              /* 2.0 → raw 131072              */
  *
  * MixedCase FR_ names are functions — they contain loops, tables, or
  * multi-step algorithms where inlining would waste ROM:
- *   FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
+ *   FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
  *
- * lowercase fr_ names are v2 functions (radian trig, wave generators,
- * ADSR envelopes):
- *   fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ...
+ * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave
+ * generators, ADSR envelopes):
+ *   fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan,
+ *   fr_wave_tri, fr_adsr_step ...
+ *
+ * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are
+ * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg.  New code
+ * should use the fr_ names directly.
  *
  * Some macros wrap functions: FR_EXP(x,r) scales x then calls
  * FR_pow2 — one-liner convenience, heavy lifting in the function.
  */
 
 /* ---- Math functions ---- */
-s32 c45   = FR_Cos(45, 0);                /* cos(45°) = 0.7071       */
+s32 c45   = fr_cos_deg(45, 0);            /* cos(45°) = 0.7071       */
 s32 s30   = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad)    */
 s32 root2 = FR_sqrt(two, R);              /* sqrt(2)  = 1.4142       */
 s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad     */
@@ -206,23 +217,25 @@ The full docs ship in two forms — pick whichever fits how you read.
 **Terminal / editor (plain markdown):**
 
 - [docs/README.md](docs/README.md) — same content as plain markdown.
-  - [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md)
-  - [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md)
+- [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md)
+- [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md)
 
 ## History
 
 FR_Math has been in service since 2000, originally built for graphics
 transforms on 16 MHz 68k Palm Pilots. It shipped inside Trumpetsoft's
 *Inkstorm* on PalmOS, then moved forward through ARM, x86, MIPS,
-RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current
-release with a full test suite, bit-exact numerical specification, and
-CI on every push.
-
+RISC-V, and various 8/16-bit embedded targets.
+The current release now has a full test suite, numerical specification, and
+CI on every push and better documentation.
+ 
 ## License
 
 BSD-2-Clause — see [LICENSE.txt](LICENSE.txt).
 (c) 2000-2026 M. Chatterjee
 
+PRs and suggestions are welcome.  Please be detailed as embedded systems can involve many tradeoffs.
+
 ## For AI coding agents
 
 - [llms.txt](llms.txt) — machine-readable API summary
@@ -230,5 +243,6 @@ BSD-2-Clause — see [LICENSE.txt](LICENSE.txt).
 
 ## Version
 
-2.0.7 — see [release_notes.md](release_notes.md) for the v1 → v2
+See [release_notes.md](release_notes.md) for the v1 → v2
 migration guide, numerical fixes, and new functionality.
+
diff --git a/VERSION b/VERSION
index f1547e6..815e68d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.7
+2.0.8
diff --git a/agents.md b/agents.md
index 2a461f6..30128db 100644
--- a/agents.md
+++ b/agents.md
@@ -14,16 +14,15 @@ Integer-only, zero dependencies, caller-selectable radix (binary point).
 ```
 src/                  Core library (this is what ships)
   FR_math.h           Public API — all macros, function declarations, constants
-  FR_math.c           All function implementations
+  FR_math.c           All function implementations (trig tables inlined)
   FR_defs.h           Type aliases (s8, s16, s32, u8, u16, u32)
-  FR_trig_table.h     Precomputed sine table
   FR_math_2D.h/.cpp   Optional C++ 2D transform class
 
 tests/                Test suite (7 programs, run via `make test`)
 examples/             Arduino .ino sketches + POSIX example
 docs/                 Markdown documentation
 pages/                HTML documentation (mirrors docs/)
-scripts/              Build, release, version sync helpers
+scripts/              Build, release, version sync, size report helpers
 tools/                Coefficient generators (Python, C++)
 dev/                  Development notes and planning (not shipped)
 ```
@@ -32,8 +31,10 @@ dev/                  Development notes and planning (not shipped)
 
 ```bash
 make lib              # compile library objects
-make test             # run all 7 test suites (27+ tests)
-make examples         # build POSIX example
+make test             # run full test suite (99% line coverage)
+make examples         # build example programs
+make size-report      # cross-compile size report (Docker)
+make size-update      # size report + patch doc files
 make clean            # remove build artifacts
 ```
 
@@ -86,9 +87,11 @@ Versioned files (all synced automatically):
 
 1. Bump `FR_MATH_VERSION_HEX` in `src/FR_math.h`
 2. Run `./scripts/sync_version.sh`
-3. Run `./tools/make_release.sh` (full validation gate)
-4. Verify `llms.txt` and `agents.md` are current with any API changes
-5. Commit, tag, push
+3. Run `./scripts/crossbuild_sizes.sh --update` (rebuild size tables)
+4. Run `./scripts/accuracy_report.sh --update` (rebuild accuracy tables)
+5. Run `./tools/make_release.sh` (full validation gate)
+6. Verify `llms.txt` and `agents.md` are current with any API changes
+7. Commit, tag, push
 
 ## Lean build options
 
@@ -96,13 +99,14 @@ Define before including `FR_math.h` to exclude optional subsystems:
 
 | Define | Removes | Savings |
 |---|---|---|
+| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB |
 | `FR_NO_PRINT` | `FR_printNumF/D/H`, `FR_numstr` | ~1.3 KB |
 | `FR_NO_WAVES` | `fr_wave_*`, `fr_adsr_*`, `FR_HZ2BAM_INC` | ~0.6 KB |
 
 ## Platform targets
 
 The library compiles on: AVR (Arduino), ARM Cortex-M0/M4, ESP32,
-RISC-V, x86/x64, MSP430, 68k, 8051. Code is 4–8 KB at `-Os` on
+RISC-V, x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11. Code is 3–9 KB at `-Os` on
 32-bit targets.
 
 ## Library publishing
diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json
index adf0019..76b25d9 100644
--- a/compare_lfm/comparison_results.json
+++ b/compare_lfm/comparison_results.json
@@ -13,19 +13,19 @@
       "double_reference": "std::sin",
       "sweep": "65536-pt, [-pi, +pi]",
       "speed": {
-        "fr_math_ns_per_call": 2.6,
-        "libfixmath_ns_per_call": 20.7,
-        "fr_math_speedup": 7.94,
+        "fr_math_ns_per_call": 7.4,
+        "libfixmath_ns_per_call": 12.7,
+        "fr_math_speedup": 1.72,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 1.34165039e-04,
-          "mean_abs_error": 4.23947344e-05,
-          "max_error_lsb": 8.8,
-          "mean_error_lsb": 2.8,
-          "max_rel_error_pct": 1.0615,
-          "mean_rel_error_pct": 0.0158
+          "max_abs_error": 7.40851348e-05,
+          "mean_abs_error": 1.88938357e-05,
+          "max_error_lsb": 4.9,
+          "mean_error_lsb": 1.2,
+          "max_rel_error_pct": 0.4816,
+          "mean_rel_error_pct": 0.0081
         },
         "libfixmath": {
           "max_abs_error": 7.74511497e-03,
@@ -43,19 +43,19 @@
       "double_reference": "std::cos",
       "sweep": "65536-pt, [-pi, +pi]",
       "speed": {
-        "fr_math_ns_per_call": 4.8,
-        "libfixmath_ns_per_call": 18.4,
-        "fr_math_speedup": 3.86,
+        "fr_math_ns_per_call": 9.9,
+        "libfixmath_ns_per_call": 13.2,
+        "fr_math_speedup": 1.34,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 1.25349009e-04,
-          "mean_abs_error": 4.65658208e-05,
-          "max_error_lsb": 8.2,
-          "mean_error_lsb": 3.1,
-          "max_rel_error_pct": 0.9018,
-          "mean_rel_error_pct": 0.0161
+          "max_abs_error": 6.76591807e-05,
+          "mean_abs_error": 2.03740409e-05,
+          "max_error_lsb": 4.4,
+          "mean_error_lsb": 1.3,
+          "max_rel_error_pct": 0.3282,
+          "mean_rel_error_pct": 0.0077
         },
         "libfixmath": {
           "max_abs_error": 7.75591931e-03,
@@ -73,19 +73,19 @@
       "double_reference": "std::tan",
       "sweep": "65536-pt, [-1.2, 1.2] rad",
       "speed": {
-        "fr_math_ns_per_call": 6.0,
-        "libfixmath_ns_per_call": 41.4,
-        "fr_math_speedup": 6.89,
+        "fr_math_ns_per_call": 14.1,
+        "libfixmath_ns_per_call": 37.2,
+        "fr_math_speedup": 2.64,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 8.49384425e-04,
-          "mean_abs_error": 1.04510886e-04,
-          "max_error_lsb": 55.7,
-          "mean_error_lsb": 6.8,
-          "max_rel_error_pct": 1.0080,
-          "mean_rel_error_pct": 0.0228
+          "max_abs_error": 5.06554437e-04,
+          "mean_abs_error": 5.84009618e-05,
+          "max_error_lsb": 33.2,
+          "mean_error_lsb": 3.8,
+          "max_rel_error_pct": 0.5850,
+          "mean_rel_error_pct": 0.0122
         },
         "libfixmath": {
           "max_abs_error": 1.82495961e-02,
@@ -104,19 +104,19 @@
       "double_reference": "std::asin",
       "sweep": "65536-pt, [-0.999, 0.999]",
       "speed": {
-        "fr_math_ns_per_call": 11.5,
-        "libfixmath_ns_per_call": 53.7,
-        "fr_math_speedup": 4.67,
+        "fr_math_ns_per_call": 11.9,
+        "libfixmath_ns_per_call": 64.0,
+        "fr_math_speedup": 5.38,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 4.76933520e-04,
-          "mean_abs_error": 4.37641042e-05,
-          "max_error_lsb": 31.3,
-          "mean_error_lsb": 2.9,
-          "max_rel_error_pct": 0.5795,
-          "mean_rel_error_pct": 0.0134
+          "max_abs_error": 3.79872462e-04,
+          "mean_abs_error": 1.04994412e-04,
+          "max_error_lsb": 24.9,
+          "mean_error_lsb": 6.9,
+          "max_rel_error_pct": 1.9776,
+          "mean_rel_error_pct": 0.0477
         },
         "libfixmath": {
           "max_abs_error": 1.01788963e-02,
@@ -134,19 +134,19 @@
       "double_reference": "std::acos",
       "sweep": "65536-pt, [-0.999, 0.999]",
       "speed": {
-        "fr_math_ns_per_call": 8.4,
-        "libfixmath_ns_per_call": 50.4,
-        "fr_math_speedup": 5.97,
+        "fr_math_ns_per_call": 11.1,
+        "libfixmath_ns_per_call": 65.0,
+        "fr_math_speedup": 5.88,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 4.72479065e-04,
-          "mean_abs_error": 4.33857475e-05,
-          "max_error_lsb": 31.0,
-          "mean_error_lsb": 2.8,
-          "max_rel_error_pct": 0.5194,
-          "mean_rel_error_pct": 0.0056
+          "max_abs_error": 3.75418007e-04,
+          "mean_abs_error": 1.00708880e-04,
+          "max_error_lsb": 24.6,
+          "mean_error_lsb": 6.6,
+          "max_rel_error_pct": 0.2724,
+          "mean_rel_error_pct": 0.0093
         },
         "libfixmath": {
           "max_abs_error": 1.01897006e-02,
@@ -164,17 +164,17 @@
       "double_reference": "std::atan",
       "sweep": "65536-pt, [-50, 50]",
       "speed": {
-        "fr_math_ns_per_call": 8.0,
-        "libfixmath_ns_per_call": 11.2,
-        "fr_math_speedup": 1.41,
+        "fr_math_ns_per_call": 10.8,
+        "libfixmath_ns_per_call": 14.8,
+        "fr_math_speedup": 1.36,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 9.57408985e-04,
-          "mean_abs_error": 7.37662492e-05,
-          "max_error_lsb": 62.7,
-          "mean_error_lsb": 4.8,
+          "max_abs_error": 9.14677954e-04,
+          "mean_abs_error": 7.43583969e-05,
+          "max_error_lsb": 59.9,
+          "mean_error_lsb": 4.9,
           "max_rel_error_pct": 0.2149,
           "mean_rel_error_pct": 0.0061
         },
@@ -194,19 +194,19 @@
       "double_reference": "std::atan2",
       "sweep": "65536-pt, 5 radii x 360 deg",
       "speed": {
-        "fr_math_ns_per_call": 15.9,
-        "libfixmath_ns_per_call": 10.5,
+        "fr_math_ns_per_call": 20.8,
+        "libfixmath_ns_per_call": 13.7,
         "fr_math_speedup": 0.66,
         "faster": "libfixmath"
       },
       "accuracy_vs_double": {
         "fr_math": {
-          "max_abs_error": 9.70679332e-04,
-          "mean_abs_error": 2.15170870e-04,
-          "max_error_lsb": 63.6,
-          "mean_error_lsb": 14.1,
+          "max_abs_error": 9.53437855e-04,
+          "mean_abs_error": 1.91371871e-04,
+          "max_error_lsb": 62.5,
+          "mean_error_lsb": 12.5,
           "max_rel_error_pct": 0.4122,
-          "mean_rel_error_pct": 0.0258
+          "mean_rel_error_pct": 0.0239
         },
         "libfixmath": {
           "max_abs_error": 1.01728729e-02,
@@ -225,9 +225,9 @@
       "double_reference": "std::sqrt",
       "sweep": "65536-pt, [0.01, 100]",
       "speed": {
-        "fr_math_ns_per_call": 18.6,
-        "libfixmath_ns_per_call": 19.8,
-        "fr_math_speedup": 1.06,
+        "fr_math_ns_per_call": 24.8,
+        "libfixmath_ns_per_call": 26.2,
+        "fr_math_speedup": 1.05,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
@@ -255,9 +255,9 @@
       "double_reference": "std::exp",
       "sweep": "65536-pt, [-5, 5]",
       "speed": {
-        "fr_math_ns_per_call": 3.1,
-        "libfixmath_ns_per_call": 67.6,
-        "fr_math_speedup": 22.02,
+        "fr_math_ns_per_call": 4.0,
+        "libfixmath_ns_per_call": 84.6,
+        "fr_math_speedup": 21.04,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
@@ -285,9 +285,9 @@
       "double_reference": "std::log",
       "sweep": "65536-pt, [0.01, 100]",
       "speed": {
-        "fr_math_ns_per_call": 8.8,
-        "libfixmath_ns_per_call": 479.3,
-        "fr_math_speedup": 54.70,
+        "fr_math_ns_per_call": 11.2,
+        "libfixmath_ns_per_call": 583.3,
+        "fr_math_speedup": 51.87,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
@@ -315,9 +315,9 @@
       "double_reference": "std::log2",
       "sweep": "65536-pt, [0.01, 100]",
       "speed": {
-        "fr_math_ns_per_call": 8.7,
-        "libfixmath_ns_per_call": 39.4,
-        "fr_math_speedup": 4.55,
+        "fr_math_ns_per_call": 11.3,
+        "libfixmath_ns_per_call": 51.1,
+        "fr_math_speedup": 4.53,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
@@ -345,8 +345,8 @@
       "double_reference": "double a*b",
       "sweep": "65536-pt, a in [-50,50], b in [-2,2]",
       "speed": {
-        "fr_math_ns_per_call": 0.9,
-        "libfixmath_ns_per_call": 1.2,
+        "fr_math_ns_per_call": 1.2,
+        "libfixmath_ns_per_call": 1.6,
         "fr_math_speedup": 1.33,
         "faster": "fr_math"
       },
@@ -375,9 +375,9 @@
       "double_reference": "double a/b",
       "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]",
       "speed": {
-        "fr_math_ns_per_call": 0.9,
-        "libfixmath_ns_per_call": 5.2,
-        "fr_math_speedup": 5.98,
+        "fr_math_ns_per_call": 1.2,
+        "libfixmath_ns_per_call": 6.9,
+        "fr_math_speedup": 5.96,
         "faster": "fr_math"
       },
       "accuracy_vs_double": {
@@ -406,7 +406,7 @@
       "double_reference": "std::hypot",
       "sweep": "65536-pt, 5 radii x 360 deg",
       "speed": {
-        "fr_math_ns_per_call": 20.0
+        "fr_math_ns_per_call": 26.2
       },
       "accuracy_vs_double": {
         "fr_math": {
@@ -425,7 +425,7 @@
       "double_reference": "std::hypot",
       "sweep": "65536-pt, 5 radii x 360 deg",
       "speed": {
-        "fr_math_ns_per_call": 2.4
+        "fr_math_ns_per_call": 3.2
       },
       "accuracy_vs_double": {
         "fr_math": {
diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md
index 9169c50..0f467ad 100644
--- a/compare_lfm/comparison_summary.md
+++ b/compare_lfm/comparison_summary.md
@@ -23,13 +23,13 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
 
 | Function | FR max LSB | FR max %% | FR avg %% | lfm max LSB | lfm max %% | lfm avg %% | Winner |
 |----------|----------:|---------:|---------:|----------:|---------:|---------:|--------|
-| sin             |     8.8 |  1.0615 |  0.0158 |     507.6 | 74.5513 |  0.6105 | FR       |
-| cos             |     8.2 |  0.9018 |  0.0161 |     508.3 | 74.4001 |  0.6121 | FR       |
-| tan             |    55.7 |  1.0080 |  0.0228 |    1196.0 |  0.7099 |  0.0410 | FR       |
-| asin            |    31.3 |  0.5795 |  0.0134 |     667.1 | 20.1233 |  2.4452 | FR       |
-| acos            |    31.0 |  0.5194 |  0.0056 |     667.8 | 15.3142 |  0.3475 | FR       |
-| atan            |    62.7 |  0.2149 |  0.0061 |     666.3 | 19.8632 |  0.4571 | FR       |
-| atan2           |    63.6 |  0.4122 |  0.0258 |     666.7 | 20.0045 |  0.9267 | FR       |
+| sin             |     4.9 |  0.4816 |  0.0081 |     507.6 | 74.5513 |  0.6105 | FR       |
+| cos             |     4.4 |  0.3282 |  0.0077 |     508.3 | 74.4001 |  0.6121 | FR       |
+| tan             |    33.2 |  0.5850 |  0.0122 |    1196.0 |  0.7099 |  0.0410 | FR       |
+| asin            |    24.9 |  1.9776 |  0.0477 |     667.1 | 20.1233 |  2.4452 | FR       |
+| acos            |    24.6 |  0.2724 |  0.0093 |     667.8 | 15.3142 |  0.3475 | FR       |
+| atan            |    59.9 |  0.2149 |  0.0061 |     666.3 | 19.8632 |  0.4571 | FR       |
+| atan2           |    62.5 |  0.4122 |  0.0239 |     666.7 | 20.0045 |  0.9267 | FR       |
 | sqrt            |     0.5 |  0.0062 |  0.0001 |       0.5 |  0.0062 |  0.0001 | tie      |
 | exp             |   208.3 |  0.1486 |  0.0078 |     216.3 |  0.0756 |  0.0042 | FR       |
 | ln              |     3.2 |  0.3012 |  0.0006 |       2.2 |  0.0557 |  0.0002 | lfm      |
@@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
 
 | Function | FR_math | libfixmath | Speedup | Faster |
 |----------|--------:|-----------:|--------:|--------|
-| sin             |    2.6 |       20.7 |   7.94x | FR      |
-| cos             |    4.8 |       18.4 |   3.86x | FR      |
-| tan             |    6.0 |       41.4 |   6.89x | FR      |
-| asin            |   11.5 |       53.7 |   4.67x | FR      |
-| acos            |    8.4 |       50.4 |   5.97x | FR      |
-| atan            |    8.0 |       11.2 |   1.41x | FR      |
-| atan2           |   15.9 |       10.5 |   0.66x | lfm     |
-| sqrt            |   18.6 |       19.8 |   1.06x | FR      |
-| exp             |    3.1 |       67.6 |  22.02x | FR      |
-| ln              |    8.8 |      479.3 |  54.70x | FR      |
-| log2            |    8.7 |       39.4 |   4.55x | FR      |
-| mul             |    0.9 |        1.2 |   1.33x | FR      |
-| div             |    0.9 |        5.2 |   5.98x | FR      |
-| hypot           |   20.0 |        --- |     --- | FR only |
-| hypot_fast8     |    2.4 |        --- |     --- | FR only |
+| sin             |    7.4 |       12.7 |   1.72x | FR      |
+| cos             |    9.9 |       13.2 |   1.34x | FR      |
+| tan             |   14.1 |       37.2 |   2.64x | FR      |
+| asin            |   11.9 |       64.0 |   5.38x | FR      |
+| acos            |   11.1 |       65.0 |   5.88x | FR      |
+| atan            |   10.8 |       14.8 |   1.36x | FR      |
+| atan2           |   20.8 |       13.7 |   0.66x | lfm     |
+| sqrt            |   24.8 |       26.2 |   1.05x | FR      |
+| exp             |    4.0 |       84.6 |  21.04x | FR      |
+| ln              |   11.2 |      583.3 |  51.87x | FR      |
+| log2            |   11.3 |       51.1 |   4.53x | FR      |
+| mul             |    1.2 |        1.6 |   1.33x | FR      |
+| div             |    1.2 |        6.9 |   5.96x | FR      |
+| hypot           |   26.2 |        --- |     --- | FR only |
+| hypot_fast8     |    3.2 |        --- |     --- | FR only |
 
 ### Summary (13 head-to-head functions)
 
diff --git a/dev/FR_math_TODO.md b/dev/FR_math_TODO.md
index 84c2336..7d28a71 100644
--- a/dev/FR_math_TODO.md
+++ b/dev/FR_math_TODO.md
@@ -1,6 +1,8 @@
-# FR_Math TODO 
-[![Build Status](https://travis-ci.org/deftio/fr_math.svg?branch=master)](https://travis-ci.org/deftio/fr_math)
-[![Coverage Status](https://coveralls.io/repos/github/deftio/fr_math/badge.svg?branch=master)](https://coveralls.io/github/deftio/fr_math?branch=master)
+# FR_Math TODO
+
+[![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml)
+
+*Historical scratchpad — not maintained as a roadmap. For current build/test, see repo root `README.md` and `agents.md`.*
 
 (c) M. A. Chatterjee 2000-2016
 
diff --git a/dev/misc/FR_math.c.checkpoint3 b/dev/misc/FR_math.c.checkpoint3
new file mode 100644
index 0000000..45c75c2
--- /dev/null
+++ b/dev/misc/FR_math.c.checkpoint3
@@ -0,0 +1,1705 @@
+/**
+ *
+ *	@file FR_math.c - c implementation file for basic fixed
+ *                              radix math routines
+ *
+ *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
+ *  @author M A Chatterjee <deftio [at] deftio [dot] com>
+ *
+ *  This file contains integer math settable fixed point radix math routines for
+ *  use on systems in which floating point is not desired or unavailable.
+ *
+ *	This software is provided 'as-is', without any express or implied
+ *	warranty. In no event will the authors be held liable for any damages
+ *	arising from the use of this software.
+ *
+ *	Permission is granted to anyone to use this software for any purpose,
+ *	including commercial applications, and to alter it and redistribute it
+ *	freely, subject to the following restrictions:
+ *
+ *	1. The origin of this software must not be misrepresented; you must not
+ *	claim that you wrote the original software. If you use this software
+ *	in a product, please place an acknowledgment in the product documentation.
+ *
+ *	2. Altered source versions must be plainly marked as such, and must not be
+ *	misrepresented as being the original software.
+ *
+ *	3. This notice may not be removed or altered from any source
+ *	distribution.
+ *
+ */
+
+#include "FR_math.h"
+#include "FR_trig_table.h"
+
+#ifndef FR_NO_STDINT
+#include <stdint.h>
+#endif
+
+/*=======================================================
+ * Full-precision radian/degree → BAM conversion helpers
+ *
+ * rad_to_bam_full(r) returns a full s32 BAM value where:
+ *   upper 16 bits = integer BAM (the u16 table index)
+ *   lower 16 bits = sub-BAM fractional part
+ * Input r must already be normalized to radix 16 and reduced to [-pi, pi].
+ *
+ * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are
+ * reordered so intermediate sums stay within s32 for |r| <= pi at r16.
+ */
+static s32 rad_to_bam_full(s32 r)
+{
+    /* 10 terms: 65536/(2*pi) ≈ 10430.37835...
+     * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14
+     * = 10430.378357 (~21-bit accuracy)
+     * Terms reordered: interleave negatives early to keep all intermediate
+     * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */
+    return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14);
+}
+
+/* deg_to_bam_full(d) — same idea for degrees.
+ * Input d must already be normalized to radix 16 and reduced to [-90, 90).
+ * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM).
+ * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */
+static s32 deg_to_bam_full(s32 d)
+{
+    return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+}
+
+/* Normalize a fixed-radix value to radix 16. */
+static s32 normalize_to_r16(s32 val, u16 radix)
+{
+    return (radix > 16) ? (val >> (radix - 16))
+         : (radix < 16) ? (val << (16 - radix))
+         : val;
+}
+
+/* Reduce non-negative radian (at r16) to [0, 2*pi).
+ * Helper used by range_reduce_rad and the near-pi small-angle paths. */
+static s32 reduce_to_2pi(s32 r)
+{
+    const s32 two_pi = FR_TWO_PI(16);  /* 411775 */
+    if (r > (two_pi << 1))
+        r -= (r / two_pi) * two_pi;
+    else if (r > two_pi)
+        r -= two_pi;
+    return r;
+}
+
+/* Range-reduce radian value (at r16, non-negative) to [-pi, pi].
+ * Caller guarantees r >= 0 (sign is handled externally). */
+static s32 range_reduce_rad(s32 r)
+{
+    r = reduce_to_2pi(r);
+    if (r > FR_PI(16))
+        r -= FR_TWO_PI(16);
+    return r;
+}
+
+/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion.
+ * Normalizes to r16, reduces via positive-only path, applies shift-only multiply.
+ * Handles inputs beyond ±2*pi with modulus (slow path). */
+u16 fr_rad_to_bam(s32 rad, u16 radix)
+{
+    s32 r = normalize_to_r16(rad, radix);
+    /* BAM wraps naturally in u16, but range_reduce expects non-negative.
+     * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */
+    s32 sign = 1;
+    if (r < 0) { r = -r; sign = -1; }
+    r = range_reduce_rad(r);
+    s32 bam_full = rad_to_bam_full(r);
+    if (sign < 0) bam_full = -bam_full;
+    return (u16)((bam_full + (1 << 15)) >> 16);
+}
+
+/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion.
+ * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */
+u16 fr_deg_to_bam(s32 deg, u16 radix)
+{
+    s32 d = normalize_to_r16(deg, radix);
+
+    /* Reduce to [-180, 180) */
+    if (d >= FR_D360_R16 || d < -FR_D360_R16) {
+        s32 n = d / FR_D360_R16;
+        d -= n * FR_D360_R16;
+    }
+    if (d >=  FR_D180_R16) d -= FR_D360_R16;
+    if (d <  -FR_D180_R16) d += FR_D360_R16;
+
+    /* Reduce to [-90, 90) with BAM quadrant offset */
+    u16 offset = 0;
+    if (d >= FR_D90_R16)      { d -= FR_D180_R16; offset = 32768; }
+    else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+    return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16));
+}
+
+/*=======================================================
+ * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan
+ *
+ * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
+ * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
+ * quadrants (1, 3) reverse the in-quadrant index so the table is always read
+ * in the same direction.
+ *
+ * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at
+ * index 128). After mirroring, small full_pos → small output (near zero),
+ * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles
+ * below one table step (~0.7°). This eliminates table quantization error
+ * in the region where it matters most.
+ *
+ * Sign rule: quadrants 2 and 3 negate the result.
+ * Mirror rule: quadrants 1 and 3 flip the in-quadrant position.
+ */
+s32 fr_sin_bam(u16 bam)
+{
+	u32 q   = ((u32)bam >> 14) & 0x3;              /* top 2 bits = quadrant */
+	u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1);  /* bottom 14 bits        */
+
+	/* Exact cardinal angles */
+	if (inq == 0) {
+		if (q == 0 || q == 2) return 0;            /*   0° or 180° → 0   */
+		if (q == 1) return  FR_TRIG_ONE;           /*  90° →  1.0        */
+		return -FR_TRIG_ONE;                       /* 270° → -1.0        */
+	}
+
+	/* Odd quadrants mirror: read table from the far end */
+	if (q == 1 || q == 3)
+		inq = FR_TRIG_QUADRANT - inq;
+
+	s32 v;
+
+	/* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step).
+	 * θ_rad = inq * (π/2) / 16384.  Output = θ * 65536 = inq * FR_kQ2RAD / 16384.
+	 * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */
+	if (inq < FR_TRIG_FRAC_MAX) {
+		v = (s32)(((u32)inq * 102944u + 8192u) >> 14);
+	} else {
+		/* Table lookup with 7-bit interpolation fraction */
+		u32 idx  = inq >> FR_TRIG_FRAC_BITS;
+		u32 frac = inq &  FR_TRIG_FRAC_MASK;
+		s32 lo = (s32)gFR_SIN_TAB_Q[idx];
+		s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1];
+		v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+		v <<= 1;   /* u0.15 → s15.16 */
+	}
+
+	return (q >= 2) ? -v : v;
+}
+
+s32 fr_cos_bam(u16 bam)
+{
+	/* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */
+	return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT));
+}
+
+s32 fr_cos(s32 rad, u16 radix)
+{
+	if (rad == 0) return FR_TRIG_ONE;
+	s32 r = normalize_to_r16(rad, radix);
+	if (r < 0) r = -r;
+	r = reduce_to_2pi(r);
+	/* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ,
+	 * cos(3π/2+δ) = sin(δ) ≈ δ. */
+	s32 delta = r - FR_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return -delta;
+	delta = r - FR_THREE_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return delta;
+	return fr_cos_bam(fr_rad_to_bam(rad, radix));
+}
+
+s32 fr_sin(s32 rad, u16 radix)
+{
+	if (rad == 0) return 0;
+	s32 r = normalize_to_r16(rad, radix);
+	s32 sign = 1;
+	if (r < 0) { r = -r; sign = -1; }
+	r = reduce_to_2pi(r);
+	/* Near 0 after reduction: sin(δ) ≈ δ */
+	if (r < 256) {
+		s32 v = r;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near π: sin(π + δ) = -sin(δ) ≈ -δ */
+	s32 delta = r - FR_PI(16);
+	if (delta >= -256 && delta <= 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */
+	delta = FR_TWO_PI(16) - r;
+	if (delta >= 0 && delta < 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */
+	if (r > FR_PI(16)) r -= FR_TWO_PI(16);
+	u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16);
+	s32 v = fr_sin_bam(bam);
+	return (sign < 0) ? -v : v;
+}
+
+/*=======================================================
+ * BAM-native tangent: fr_tan_bam
+ *
+ * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant
+ * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the
+ * second octant (45°, 90°). Result is s15.16 with saturation at the
+ * poles.
+ *
+ * No 64-bit intermediates. One 32-bit division only in the >45° path.
+ */
+s32 fr_tan_bam(u16 bam)
+{
+	u32 q   = ((u32)bam >> 14) & 0x3;          /* quadrant (top 2 bits) */
+	u32 inq = (u32)bam & 0x3FFFu;              /* in-quadrant (14 bits) */
+	s32 sign = 1;
+	u32 idx, frac;
+	s32 lo, hi, raw;
+
+	/* Exact zeros: bam lands exactly on 0° or 180° */
+	if (inq == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Poles: bam lands exactly on 90° or 270° */
+	if (inq == 0 && (q == 1 || q == 3))
+		return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+	/* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */
+	if (q == 1 || q == 3) {
+		inq = 0x4000u - inq;
+		sign = -1;
+	}
+
+	/* Now inq is in (0, 0x4000) = (0°, 90°) exclusive.
+	 * Split into first octant [0, 45°) and second octant [45°, 90°). */
+	if (inq < FR_TAN_OCTANT) {
+		/* First octant: direct table lookup + lerp.
+		 * inq is 13 bits; top FR_TAN_TABLE_BITS index the table,
+		 * bottom FR_TAN_FRAC_BITS drive interpolation. */
+		idx  = inq >> FR_TAN_FRAC_BITS;
+		frac = inq &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near zero: redo interpolation with 4 extra bits of
+			 * precision to reduce rounding error when result is small. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			raw = (raw + 4) >> 3;        /* u0.19 → s15.16 with rounding */
+		} else {
+			raw <<= 1;                   /* u0.15 → s15.16              */
+		}
+	} else {
+		/* Second octant: tan(x) = 1 / tan(90° - x).
+		 * complement is in (0, 0x2000] = (0°, 45°]. */
+		u32 comp = 0x4000u - inq;
+
+		/* Look up tan(complement) from the table */
+		idx  = comp >> FR_TAN_FRAC_BITS;
+		frac = comp &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near pole: redo interpolation with 4 extra bits of
+			 * precision. The reciprocal amplifies small interpolation
+			 * errors, so extra precision significantly helps here.
+			 * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			if (raw_hp < 32) {
+				raw = FR_TRIG_MAXVAL;
+			} else {
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+			}
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+
+	return (sign < 0) ? -raw : raw;
+}
+
+/* fr_tan — radian-input tangent with full sub-BAM precision.
+ *
+ * Goes directly to the 65-entry octant tangent table with 16-bit
+ * interpolation precision. Sign from quadrant, magnitude from table.
+ * No s64 intermediates. One 32-bit division in the second-octant path.
+ *
+ * Architecture:
+ *   1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-)
+ *   2. Magnitude: from octant table lookup + reciprocal identity
+ *      - First octant [0,45°): direct table lerp
+ *      - Second octant [45°,90°): 1/tan(90°-x) via reciprocal
+ *   3. Return sign * magnitude */
+
+/* Internal: given a full s32 BAM, compute |tan| directly from the table.
+ * Returns the unsigned magnitude (always >= 0). */
+static s32 tan_mag_from_bam_full(s32 bam_full)
+{
+	u16 bam0 = (u16)(bam_full >> 16);
+	u32 frac_sub = (u32)bam_full & 0xFFFFu;
+
+	u32 q   = ((u32)bam0 >> 14) & 0x3u;
+	u32 inq = (u32)bam0 & 0x3FFFu;
+
+	/* Exact zeros: tan(0°) = tan(180°) = 0 */
+	if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Exact poles: tan(90°) = tan(270°) → saturate */
+	if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3))
+		return FR_TRIG_MAXVAL;
+
+	/* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range.
+	 * After this, full_pos represents distance from the nearest zero. */
+	u32 full_pos;
+	if (q == 1 || q == 3)
+		full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub;
+	else
+		full_pos = ((u32)inq << 16) + frac_sub;
+
+	/* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */
+	s32 raw;
+	if (full_pos < ((u32)FR_TAN_OCTANT << 16)) {
+		/* First octant [0, 45°): direct table lookup.
+		 * 64 table intervals, each 2^23 sub-BAM units wide. */
+		u32 idx    = full_pos >> 23;
+		u32 frac16 = (full_pos >> 7) & 0xFFFFu;
+
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+		if (raw < 0x40) {
+			/* Near zero: redo with 4 extra bits of precision */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+			raw = (raw + 4) >> 3;    /* u0.19 → s15.16 with rounding */
+		} else {
+			raw <<= 1;              /* u0.15 → s15.16              */
+		}
+	} else {
+		/* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x).
+		 * Complement = distance from pole, in first-octant range. */
+		u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos;
+
+		u32 idx    = comp >> 23;
+		u32 frac16 = (comp >> 7) & 0xFFFFu;
+
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+		if (raw < 0x40) {
+			/* Near pole: redo with 4 extra bits then reciprocal */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+			if (raw_hp < 32)
+				raw = FR_TRIG_MAXVAL;
+			else
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+	return raw;
+}
+
+s32 fr_tan(s32 rad, u16 radix)
+{
+	if (rad == 0) return 0;
+	/* tan(-x) = -tan(x): factor out sign, reduce positive */
+	s32 r = normalize_to_r16(rad, radix);
+	s32 tan_sign = 1;
+	if (r < 0) { r = -r; tan_sign = -1; }
+	r = reduce_to_2pi(r);
+	/* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */
+	s32 delta = r - FR_PI(16);
+	if (delta >= -256 && delta <= 256) {
+		return (tan_sign < 0) ? -delta : delta;
+	}
+	/* Full pipeline */
+	if (r > FR_PI(16))
+		r -= FR_TWO_PI(16);
+	s32 bam_full = rad_to_bam_full(r);
+
+	/* Sign from quadrant of the BAM position */
+	u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+	s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+
+	s32 mag = tan_mag_from_bam_full(bam_full);
+	return (sign < 0) ? -mag : mag;
+}
+
+/*=======================================================
+ * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then
+ * call the BAM-native functions. Cardinal angles are exact.
+ */
+
+s32 fr_cos_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg));
+	if (deg < 0) deg = -deg;
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  FR_TRIG_ONE;
+		if (rem == 90)  return  0;
+		if (rem == 180) return -FR_TRIG_ONE;
+		if (rem == 270) return  0;
+	}
+	/* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180,
+	 * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */
+	s32 d = normalize_to_r16(deg, radix);
+	if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; }
+	{
+		const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+		s32 delta = d - FR_D90_R16;
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return -dr;
+		}
+		delta = d - (FR_D90_R16 + FR_D180_R16);
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return dr;
+		}
+	}
+	return fr_cos_bam(fr_deg_to_bam(deg, radix));
+}
+
+s32 fr_sin_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg));
+	s32 sign = 1;
+	if (deg < 0) { deg = -deg; sign = -1; }
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  0;
+		if (rem == 90)  return (sign < 0) ? -FR_TRIG_ONE :  FR_TRIG_ONE;
+		if (rem == 180) return  0;
+		if (rem == 270) return (sign < 0) ?  FR_TRIG_ONE : -FR_TRIG_ONE;
+	}
+	s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix));
+	return (sign < 0) ? -v : v;
+}
+
+s32 FR_TanI(s32 deg)
+{
+	/* Exact pole: deg mod 180 == ±90. Sign matches input sign. */
+	s32 rem = deg % 180;
+	if (rem == 90 || rem == -90)
+		return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	return fr_tan_bam(FR_DEG2BAM_I(deg));
+}
+
+/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */
+static s32 range_reduce_deg_bam_full(s32 deg, u16 radix)
+{
+	s32 d = normalize_to_r16(deg, radix);
+	if (d >= FR_D360_R16) {
+		s32 n = d / FR_D360_R16;
+		d -= n * FR_D360_R16;
+	}
+	if (d >= FR_D180_R16) d -= FR_D360_R16;
+	s32 offset = 0;
+	if (d >= FR_D90_R16)      { d -= FR_D180_R16; offset = (s32)0x80000000u; }
+	else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; }
+	return offset + deg_to_bam_full(d);
+}
+
+s32 fr_tan_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return FR_TanI(deg);
+	/* tan(-x) = -tan(x): factor out sign, reduce positive */
+	s32 tan_sign = 1;
+	if (deg < 0) { deg = -deg; tan_sign = -1; }
+	/* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 deg_int = deg >> radix;
+		s32 rem = deg_int % 180;
+		if (rem == 0)  return 0;
+		if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	}
+	s32 bam_full = range_reduce_deg_bam_full(deg, radix);
+	u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+	s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+	s32 mag = tan_mag_from_bam_full(bam_full);
+	return (sign < 0) ? -mag : mag;
+}
+
+/*=======================================================
+ * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest)
+ *
+ * Treats x and y as fixed-point values at the same radix r and returns
+ * (x*y) >> r at radix r. The user is responsible for tracking the radix
+ * point and for guaranteeing the product fits in 32 bits.
+ *
+ * Adds 0.5 LSB (0x8000) before the shift so the result rounds to
+ * nearest instead of truncating toward zero.
+ */
+s32 FR_FixMuls(s32 x, s32 y)
+{
+	int64_t v = (int64_t)x * (int64_t)y;
+	return (s32)((v + 0x8000) >> 16);
+}
+
+/*=======================================================
+ * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest)
+ *
+ * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on
+ * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits
+ * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift).
+ */
+s32 FR_FixMulSat(s32 x, s32 y)
+{
+	int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16;
+	if (v >  (int64_t)0x7fffffff) return  FR_OVERFLOW_POS;
+	if (v < -(int64_t)0x80000000) return  FR_OVERFLOW_NEG;
+	return (s32)v;
+}
+
+/*=======================================================
+  FR_FixAddSat (x+y saturated add)
+  programmer must align radix points before using this function
+ */
+s32 FR_FixAddSat(s32 x, s32 y)
+{
+	s32 sum = x + y;
+	if (x < 0)
+	{
+		if (y < 0)
+			return (sum >= 0) ? FR_OVERFLOW_NEG : sum;
+	}
+	else
+	{
+		if (y >= 0)
+			return (sum <= 0) ? FR_OVERFLOW_POS : sum;
+	}
+	return sum;
+}
+
+/* Inverse Trig
+ * acos with binary search of the BAM-native quadrant table.
+ *
+ * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant
+ * cos table for the table entry closest to |input|. Apply quadrant mirror
+ * if input was negative.
+ */
+/* FR_acos — returns radians at out_radix.
+ * Range: [0, pi].  Input is a cosine value at the given radix.
+ *
+ * Uses the 129-entry sine table in reverse: binary-search the ascending
+ * table to find asin(|input|), then acos = pi/2 - asin (with sign handling
+ * for the second quadrant).
+ */
+s32 FR_acos(s32 input, u16 radix, u16 out_radix)
+{
+	s32 v;
+	s16 sign;
+	s32 lo, hi, mid;
+	s32 idx, d, num, frac;
+	s32 input_abs;
+
+	/* Work with absolute value at the caller's radix */
+	sign = (s16)((input < 0) ? 1 : 0);
+	input_abs = sign ? -input : input;
+
+	/* Clamp at the caller's radix */
+	{
+		s32 one = (s32)1 << radix;
+		if (input_abs >= one)
+			return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0;
+	}
+
+	v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
+
+	/* Small-angle fast path: when cos(θ) is close to 1.0, the sine table
+	 * has poor resolution near the top (entries close together).
+	 * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121]
+	 * means the input is > cos(7*π/256) ≈ 0.9975. */
+	if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8])
+	{
+		s32 one = (s32)1 << radix;
+		s32 one_minus_x = one - input_abs;           /* 1-|x| at caller radix */
+		s32 two_omx     = one_minus_x << 1;          /* 2(1-|x|) at caller radix */
+		s32 rad_native  = FR_sqrt(two_omx, radix);   /* radians at caller radix */
+		s32 rad_out     = FR_CHRDX(rad_native, radix, out_radix);
+		if (sign)
+			rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out;
+		return rad_out;
+	}
+
+	/* Binary search on the ascending sine table.
+	 * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°).
+	 *
+	 * Find the first index where table[idx] >= v. */
+	lo = 0;
+	hi = FR_TRIG_TABLE_SIZE;
+	while (lo < hi)
+	{
+		mid = (lo + hi) >> 1;
+		if ((s32)gFR_SIN_TAB_Q[mid] < v)
+			lo = mid + 1;
+		else
+			hi = mid;
+	}
+
+	/* lo is now the first index where table[lo] >= v.
+	 * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo].
+	 * This gives us the asin angle; acos = pi/2 - asin. */
+	idx = lo;
+	if (idx <= 0)
+	{
+		idx = 0;
+		frac = 0;
+	}
+	else if (idx >= FR_TRIG_TABLE_SIZE)
+	{
+		idx = FR_TRIG_TABLE_SIZE - 1;
+		frac = 0;
+	}
+	else
+	{
+		/* Interpolate between table[idx-1] and table[idx].
+		 * d = table[idx] - table[idx-1]  (>= 0, sin increasing)
+		 * num = v - table[idx-1]          (how far past table[idx-1])
+		 */
+		d   = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1];
+		num = v - (s32)gFR_SIN_TAB_Q[idx - 1];
+		if (d > 0)
+			frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d;
+		else
+			frac = 0;
+		idx = idx - 1;
+	}
+
+	{
+		/* asin_bam is the angle in first-quadrant BAM whose sin = v */
+		u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+		/* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */
+		u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam);
+		if (sign)
+			bam = (u16)(FR_BAM_HALF - bam);  /* mirror: pi - angle */
+		return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
+	}
+}
+
+/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */
+s32 FR_asin(s32 input, u16 radix, u16 out_radix)
+{
+	/* asin(x) = pi/2 - acos(x) */
+	s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+	return half_pi - FR_acos(input, radix, out_radix);
+}
+
+/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians
+ * at the specified output radix (s32).
+ *
+ * Range: [-pi, pi]. Returns 0 for atan2(0,0).
+ *
+ * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the
+ * angle with FR_asin or FR_acos (both use the 129-entry cosine table).
+ * To stay in the well-conditioned region of each inverse function we
+ * switch at 45°:
+ *   |y| <= |x|  →  use asin(y/h)   — asin stable near 0
+ *   |y| >  |x|  →  use acos(x/h)   — acos stable near pi/2
+ * This keeps the derivative amplification factor below 1.414x everywhere.
+ */
+s32 FR_atan2(s32 y, s32 x, u16 out_radix)
+{
+	s32 ax, ay, h, q1_angle;
+
+	/* Axis cases — exact angles, no divide. */
+	if (x == 0)
+	{
+		if (y > 0) return  FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /*  pi/2 */
+		if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /* -pi/2 */
+		return 0;
+	}
+	if (y == 0)
+		return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix);      /* 0 or pi */
+
+	ax = (x < 0) ? -x : x;
+	ay = (y < 0) ? -y : y;
+
+	/* Normalise so max(ax,ay) sits in [2^14, 2^15).  This gives
+	 * FR_hypot_fast8 enough integer bits for the shift-only segments
+	 * to produce an accurate ratio — critical when the raw inputs are
+	 * small (e.g. atan2(1,1) at radix 0).  Scaling both by the same
+	 * power of two doesn't change the angle. */
+	{
+		s32 mx = (ax > ay) ? ax : ay;
+		while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; }
+		while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; }
+	}
+
+	h = FR_hypot_fast8((s32)ax, (s32)ay);
+	if (h == 0) return 0;  /* degenerate */
+
+	/* Compute the first-quadrant angle (positive, [0..pi/2]).
+	 * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15).
+	 *
+	 * Small-angle fast path: when the minor-axis ratio is small,
+	 * asin(x) ≈ x (error < x³/6).  Below ~5° the cubic term is
+	 * smaller than the table-lookup error, so the direct identity
+	 * is both faster and more accurate.  Threshold 2753 at r15
+	 * corresponds to sin(~4.8°) = 0.084. */
+	#define FR_ATAN2_SMALL  2753
+	if (ay <= ax)
+	{
+		/* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */
+		s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h);
+		if (sin_val < FR_ATAN2_SMALL)
+			q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix);
+		else
+			q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix);
+	}
+	else
+	{
+		/* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */
+		s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h);
+		if (cos_val < FR_ATAN2_SMALL)
+		{
+			/* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */
+			s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+			q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix);
+		}
+		else
+			q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix);
+	}
+
+	/* Apply quadrant from signs of x and y.
+	 * q1_angle is always positive [0..pi/2]. */
+	{
+		s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
+		if (x > 0)
+			return (y > 0) ? q1_angle : -q1_angle;
+		/* x < 0: mirror across y-axis */
+		return (y > 0) ? (pi - q1_angle) : (q1_angle - pi);
+	}
+}
+
+/* FR_atan(input, radix, out_radix) — arctangent of a single argument.
+ * Returns radians at out_radix, range [-pi/2, pi/2].
+ */
+s32 FR_atan(s32 input, u16 radix, u16 out_radix)
+{
+	s32 one = (s32)1 << radix;
+	return FR_atan2(input, one, out_radix);
+}
+
+/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16
+ * fixed point. Entry i = round(2^(i/64) * 65536).  Size: 260 bytes.
+ * Used by FR_pow2 to look up the fractional power of 2 with linear
+ * interpolation.
+ */
+static const u32 gFR_POW2_FRAC_TAB[65] = {
+     65536,  66250,  66971,  67700,  68438,  69183,  69936,  70698,
+     71468,  72246,  73032,  73828,  74632,  75444,  76266,  77096,
+     77936,  78785,  79642,  80510,  81386,  82273,  83169,  84074,
+     84990,  85915,  86851,  87796,  88752,  89719,  90696,  91684,
+     92682,  93691,  94711,  95743,  96785,  97839,  98905,  99982,
+    101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031,
+    110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899,
+    120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660,
+    131072
+};
+
+/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix.
+ *
+ * Algorithm: split input into floor(integer) and fractional part. The
+ * fractional part is in [0, 1) by construction (Euclidean / mathematical
+ * floor — the fractional part of -2.3 is +0.7, not -0.3). Then
+ *   2^(int + frac) = 2^int * 2^frac
+ * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int
+ * is a shift.
+ *
+ * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table).
+ * Linear interpolation leaves a small concavity error in each interval.
+ */
+s32 FR_pow2(s32 input, u16 radix)
+{
+	s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result;
+	u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+	/* Mathematical floor: for positive input it's input>>radix; for
+	 * negative input we need to round toward -infinity, not toward zero.
+	 */
+	if (input >= 0)
+	{
+		flr = (s32)((u32)input >> radix);
+		frac_full = (s32)((u32)input & mask);
+	}
+	else
+	{
+		s32 neg = -input;
+		s32 nflr = (s32)((u32)neg >> radix);
+		s32 nfrc = (s32)((u32)neg & mask);
+		if (nfrc == 0)
+		{
+			flr = -nflr;
+			frac_full = 0;
+		}
+		else
+		{
+			flr = -nflr - 1;          /* floor toward -inf */
+			frac_full = (s32)((1L << radix) - nfrc);
+		}
+	}
+
+	/* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */
+	if (radix > 16)
+		frac_full >>= (radix - 16);
+	else if (radix < 16)
+		frac_full <<= (16 - radix);
+	/* now frac_full is in [0, 65536) representing fractional in s.16. */
+
+	/* Top 6 bits index the table; bottom 10 are the interpolation fraction. */
+	idx     = frac_full >> 10;
+	frac_lo = frac_full & ((1L << 10) - 1);
+	lo = (s32)gFR_POW2_FRAC_TAB[idx];
+	hi = (s32)gFR_POW2_FRAC_TAB[idx + 1];
+	mant = lo + (((hi - lo) * frac_lo) >> 10);  /* mant in s.16, in [1.0, 2.0) */
+
+	/* Apply integer shift. mant is at radix 16. We want output at `radix`.
+	 * If radix == 16: just shift mant.
+	 * Otherwise re-radix mant first.
+	 */
+	if (flr >= 0)
+	{
+		/* result = mant << flr, then re-radix to caller's radix. */
+		if (flr >= 30)
+			return FR_OVERFLOW_POS;
+		result = mant << flr;
+		return FR_CHRDX(result, 16, radix);
+	}
+	else
+	{
+		/* mant >> -flr at radix 16, then re-radix. */
+		s32 sh = -flr;
+		if (sh >= 30)
+			return 0;                       /* underflow */
+		result = mant >> sh;
+		return FR_CHRDX(result, 16, radix);
+	}
+}
+
+/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m)
+ * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the
+ * interpolation between idx and idx+1 never reads out of bounds.
+ * Size: 260 bytes.  Entry i = round(log2(1 + i/64) * 65536).
+ */
+static const u32 gFR_LOG2_MANT_TAB[65] = {
+        0,  1466,  2909,  4331,  5732,  7112,  8473,  9814,
+    11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909,
+    21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029,
+    30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346,
+    38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990,
+    45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063,
+    52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643,
+    59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794,
+    65536
+};
+
+/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number.
+ *
+ *   input        : value to take log2 of, treated as a positive sM.radix value.
+ *   radix        : number of fractional bits in `input`.
+ *   output_radix : number of fractional bits in the result.
+ *
+ * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we
+ * return a large negative sentinel rather than crash).
+ *
+ * Algorithm:
+ *   1. Find p, the position of the leading 1 bit of `input`.
+ *      log2(input) = p + log2(input / 2^p), where the second term is in
+ *      [0, 1) because (input / 2^p) is in [1, 2).
+ *   2. Normalize the mantissa to s1.31 by shifting `input` so its top bit
+ *      sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1).
+ *   3. Look up log2(m) in the 65-entry table with linear interpolation
+ *      across the next 24 bits. Result is in s.16.
+ *   4. integer_part = (p - radix), then result = (integer_part << 16) +
+ *      mantissa_log2.
+ *   5. Re-radix to the requested output_radix via FR_CHRDX.
+ *
+ * Worst-case absolute error: ~6e-5 in log2 units (65-entry table).
+ */
+s32 FR_log2(s32 input, u16 radix, u16 output_radix)
+{
+	s32 p, integer_part, idx, frac, lo, hi, mant_log2, result;
+	u32 m, u;
+
+	if (input <= 0)
+		return FR_LOG2MIN;
+
+	/* Step 1: find the position of the leading 1 bit. */
+	u = (u32)input;
+	p = 0;
+	while (u > 1)
+	{
+		u >>= 1;
+		p++;
+	}
+
+	/* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa).
+	 * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31).
+	 * The fractional part of m / 2^30 is in [0, 1), and that's what we look
+	 * up in the table.
+	 */
+	if (p >= 30)
+		m = (u32)input >> (p - 30);
+	else
+		m = (u32)input << (30 - p);
+
+	/* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part
+	 * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6
+	 * bits of m_frac; the lower 24 bits are the interpolation fraction.
+	 */
+	m -= (1u << 30);
+	idx  = (s32)(m >> 24);                    /* 6 bits  */
+	frac = (s32)(m & ((1u << 24) - 1));       /* 24 bits */
+	lo = (s32)gFR_LOG2_MANT_TAB[idx];
+	hi = (s32)gFR_LOG2_MANT_TAB[idx + 1];
+	mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24);
+
+	/* Step 3: assemble. integer_part = p - radix. */
+	integer_part = p - (s32)radix;
+	result = (integer_part << 16) + mant_log2;
+
+	/* Step 4: re-radix to output_radix. */
+	return FR_CHRDX(result, 16, output_radix);
+}
+
+s32 FR_ln(s32 input, u16 radix, u16 output_radix)
+{
+	s32 r = FR_log2(input, radix, output_radix);
+	return FR_MULK28(r, FR_krLOG2E_28);
+}
+
+s32 FR_log10(s32 input, u16 radix, u16 output_radix)
+{
+	s32 r = FR_log2(input, radix, output_radix);
+	return FR_MULK28(r, FR_krLOG2_10_28);
+}
+
+#ifndef FR_NO_PRINT
+/***************************************
+ * FR_printNumD - write a decimal integer with space padding.
+ *
+ * Equivalent to "%*d" in printf, modulo the return convention.
+ *
+ *   f       : per-character output function (e.g. putchar). Must not be NULL.
+ *   n       : signed integer to print.
+ *   pad     : minimum field width; spaces are prepended to reach this width.
+ *
+ * Returns the number of characters written on success, or -1 if `f` is NULL.
+ */
+int FR_printNumD(int (*f)(char), int n, int pad)
+{
+	unsigned int mag;
+	int written = 0, neg = 0;
+	int digits = 1;
+	unsigned int t;
+
+	if (!f)
+		return -1;
+
+	if (n < 0)
+	{
+		neg = 1;
+		mag = (unsigned int)(-(long)n); /* safe for INT_MIN */
+	}
+	else
+	{
+		mag = (unsigned int)n;
+	}
+
+	/* Count decimal digits in mag (always at least 1 for n=0). */
+	t = mag;
+	while (t >= 10)
+	{
+		t /= 10;
+		digits++;
+	}
+
+	/* Pad with spaces. The total width includes the sign. */
+	{
+		int total = digits + (neg ? 1 : 0);
+		while (pad-- > total)
+		{
+			f(' ');
+			written++;
+		}
+	}
+
+	if (neg)
+	{
+		f('-');
+		written++;
+	}
+
+	/* Print digits MSB first by computing the largest power of 10 <= mag. */
+	{
+		unsigned int p = 1;
+		int i;
+		for (i = 1; i < digits; i++)
+			p *= 10;
+		while (p > 0)
+		{
+			f((char)('0' + (mag / p) % 10));
+			written++;
+			if (p == 1)
+				break;
+			p /= 10;
+		}
+	}
+
+	return written;
+}
+
+/***************************************
+ * FR_printNumF - write a fixed-point number as a decimal floating-point string.
+ *
+ *   f      : per-character output function. Must not be NULL.
+ *   n      : signed fixed-point value at the given radix.
+ *   radix  : number of fractional bits in `n`.
+ *   pad    : minimum field width (including sign and decimal point).
+ *   prec   : number of fractional digits to print.
+ *
+ * Returns the number of characters written on success, -1 if `f` is NULL.
+ *
+ * Rounding policy: truncates fractional digits beyond `prec` (no rounding).
+ */
+int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec)
+{
+	unsigned int mag_int;
+	u32 mag_frac;
+	u32 frac_mask;
+	int written = 0, neg = 0;
+	int int_digits = 1;
+	int total;
+	unsigned int t;
+
+	if (!f)
+		return -1;
+
+	frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+	if (n < 0)
+	{
+		neg = 1;
+		/* Negate as unsigned to avoid INT_MIN overflow. */
+		u32 un = (u32)(-(int64_t)n);
+		mag_int  = (unsigned int)(un >> radix);
+		mag_frac = un & frac_mask;
+	}
+	else
+	{
+		mag_int  = (unsigned int)((u32)n >> radix);
+		mag_frac = (u32)n & frac_mask;
+	}
+
+	/* Count integer digits. */
+	t = mag_int;
+	while (t >= 10)
+	{
+		t /= 10;
+		int_digits++;
+	}
+
+	/* Total visible width = sign + int + (dot + prec digits if prec>0). */
+	total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0);
+	while (pad-- > total)
+	{
+		f(' ');
+		written++;
+	}
+
+	if (neg)
+	{
+		f('-');
+		written++;
+	}
+
+	/* Print integer part. */
+	{
+		unsigned int p = 1;
+		int i;
+		for (i = 1; i < int_digits; i++)
+			p *= 10;
+		while (p > 0)
+		{
+			f((char)('0' + (mag_int / p) % 10));
+			written++;
+			if (p == 1)
+				break;
+			p /= 10;
+		}
+	}
+
+	/* Print fractional part. Extract one decimal digit at a time:
+	 * frac' = frac * 10
+	 * digit = frac' >> radix
+	 * frac  = frac' & frac_mask
+	 */
+	if (prec > 0)
+	{
+		f('.');
+		written++;
+		while (prec-- > 0)
+		{
+			u32 scaled;
+			int digit;
+			scaled = (u32)(((uint64_t)mag_frac * 10));
+			digit = (int)(scaled >> radix);
+			mag_frac = scaled & frac_mask;
+			f((char)('0' + (digit % 10)));
+			written++;
+		}
+	}
+
+	return written;
+}
+
+/***************************************
+ * FR_printNumH - write an integer as hexadecimal.
+ *
+ *   f          : per-character output function. Must not be NULL.
+ *   n          : integer to print (interpreted as unsigned for the digits).
+ *   showPrefix : if non-zero, prepend "0x".
+ *
+ * Returns the number of characters written on success, -1 if f is NULL.
+ */
+int FR_printNumH(int (*f)(char), int n, int showPrefix)
+{
+	unsigned int u = (unsigned int)n;
+	int written = 0;
+	int x = (int)((sizeof(int) << 1) - 1);
+	int d;
+
+	if (!f)
+		return -1;
+
+	if (showPrefix)
+	{
+		f('0');
+		f('x');
+		written += 2;
+	}
+
+	do
+	{
+		d = (int)((u >> (x << 2)) & 0xf);
+		d = (d > 9) ? (d - 0xa + 'a') : (d + '0');
+		f((char)d);
+		written++;
+	} while (x--);
+
+	return written;
+}
+
+/*=======================================================
+ * FR_numstr — parse a decimal string into a fixed-point value.
+ *
+ * This is the runtime inverse of FR_printNumF: given a string like
+ * "12.34" or "-0.05" and a radix (number of fractional bits), it
+ * returns the s32 fixed-point representation.
+ *
+ * Features:
+ *   - Leading whitespace is skipped.
+ *   - Optional sign ('+' or '-').
+ *   - Up to 9 fractional digits are used (s32 range).
+ *   - No malloc, no strtod, no libm.
+ *
+ * Returns 0 for NULL or empty input.
+ */
+s32 FR_numstr(const char *s, u16 radix)
+{
+    static const s32 pow10[10] = {
+        1L, 10L, 100L, 1000L, 10000L,
+        100000L, 1000000L, 10000000L, 100000000L, 1000000000L
+    };
+    s32 int_part = 0, frac_part = 0;
+    int frac_digits = 0, neg = 0;
+    s32 result;
+
+    if (!s || !*s) return 0;
+
+    while (*s == ' ' || *s == '\t') s++;          /* skip whitespace */
+    if (*s == '-') { neg = 1; s++; }              /* sign            */
+    else if (*s == '+') { s++; }
+
+    while (*s >= '0' && *s <= '9')                /* integer part    */
+        { int_part = int_part * 10 + (*s - '0'); s++; }
+
+    if (*s == '.') {                              /* fractional part */
+        s++;
+        while (*s >= '0' && *s <= '9') {
+            if (frac_digits < 9)
+                { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; }
+            s++;
+        }
+    }
+
+    result = int_part << radix;
+    if (frac_digits > 0)
+        result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]);
+
+    return neg ? -result : result;
+}
+#endif /* FR_NO_PRINT */
+
+/*=======================================================
+ * Square root and hypot
+ *
+ * fr_isqrt64 is a private helper implementing the digit-by-digit
+ * ("shift-and-subtract") integer square root. The core loop computes
+ * floor(sqrt(n)), then a final remainder check rounds to nearest.
+ * Uses no division. At most 32 iterations.
+ */
+static u32 fr_isqrt64(uint64_t n)
+{
+	uint64_t root = 0;
+	uint64_t bit  = (uint64_t)1 << 62;
+	while (bit > n) bit >>= 2;
+	while (bit != 0)
+	{
+		uint64_t trial = root + bit;
+		if (n >= trial)
+		{
+			n -= trial;
+			root = (root >> 1) + bit;
+		}
+		else
+		{
+			root >>= 1;
+		}
+		bit >>= 2;
+	}
+	/* round to nearest: if remainder > root, (root+1)^2 is closer */
+	if (n > root)
+		root++;
+	return (u32)root;
+}
+
+/*=======================================================
+ * FR_sqrt - fixed-radix square root.
+ *
+ *   input  : value at radix `radix`. Must be >= 0.
+ *   radix  : fractional bits of input AND result.
+ *   return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0.
+ *
+ * Math: sqrt(input_fp / 2^r) at radix r is
+ *   result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r)
+ * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This
+ * works for any input that fits in s32 and any radix in [0, 30].
+ *
+ * Precision: round-to-nearest sqrt. Worst-case absolute error is
+ * <= 0.5 LSB at the requested radix.
+ * Always non-negative for non-negative input. Result is monotone in
+ * input.
+ *
+ * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller
+ * can test `result == FR_DOMAIN_ERROR` to detect domain errors.
+ *
+ * Side effects: none. Pure function.
+ */
+s32 FR_sqrt(s32 input, u16 radix)
+{
+	uint64_t n;
+
+	if (input < 0)
+		return FR_DOMAIN_ERROR;
+	if (input == 0)
+		return 0;
+
+	n = (uint64_t)(u32)input << radix;
+	return (s32)fr_isqrt64(n);
+}
+
+/*=======================================================
+ * FR_hypot - sqrt(x*x + y*y) without intermediate overflow.
+ *
+ *   x, y   : values at radix `radix`
+ *   radix  : fractional bits of inputs AND result
+ *   return : sqrt(x*x + y*y) at radix `radix`.
+ *
+ * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix
+ * value yields an r-radix result, so no extra shifting is needed. The
+ * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never
+ * overflows for any s32 inputs.
+ *
+ * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB
+ * at the requested radix.
+ *
+ * Side effects: none. Pure function.
+ */
+s32 FR_hypot(s32 x, s32 y, u16 radix)
+{
+	uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x);
+	uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y);
+	(void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */
+	return (s32)fr_isqrt64(xx + yy);
+}
+
+/*=======================================================
+ * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation.
+ *
+ * Shift-only, no multiply, no 64-bit.  Based on the piecewise-linear
+ * method described in US Patent 6,567,777 B1 (Chatterjee, expired).
+ * Peak error: ~0.10%.
+ */
+s32 FR_hypot_fast8(s32 x, s32 y)
+{
+    s32 hi, lo;
+
+    /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */
+    if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x;
+    if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y;
+
+    /* hi = max(|x|,|y|), lo = min(|x|,|y|) */
+    if (x > y) { hi = x; lo = y; }
+    else       { hi = y; lo = x; }
+
+    if (hi == 0) return 0;
+
+    /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo.
+     * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */
+    if ((hi >> 1) < lo) {
+        /* β in (0.5, 1.0] */
+        if (lo > hi - (hi >> 2)) {
+            /* β in (0.75, 1.0] */
+            if (lo > hi - (hi >> 3))                   /* β > 0.875 */
+                /* a≈0.7305, b≈0.6836 */
+                return hi - (hi >> 2) - (hi >> 6) - (hi >> 8)
+                     + lo - (lo >> 2) - (lo >> 4) - (lo >> 8);
+            else                                        /* β in (0.75, 0.875] */
+                /* a≈0.7803, b≈0.6262 */
+                return hi - (hi >> 2) + (hi >> 5) - (hi >> 10)
+                     + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12);
+        } else {
+            /* β in (0.5, 0.75] */
+            if (lo > hi - (hi >> 1) + (hi >> 3))       /* β > 0.625 */
+                /* a≈0.8281, b≈0.5630 */
+                return hi - (hi >> 2) + (hi >> 4) + (hi >> 6)
+                     + (lo >> 1) + (lo >> 4) + (lo >> 11);
+            else                                        /* β in (0.5, 0.625] */
+                /* a≈0.8728, b≈0.4893 */
+                return hi - (hi >> 3) - (hi >> 9) - (hi >> 12)
+                     + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10);
+        }
+    } else {
+        /* β in [0, 0.5] */
+        if ((hi >> 2) < lo) {
+            /* β in (0.25, 0.5] */
+            if ((hi >> 1) - (hi >> 3) < lo)             /* β > 0.375 */
+                /* a≈0.9180, b≈0.3984 */
+                return hi - (hi >> 4) - (hi >> 6) - (hi >> 8)
+                     + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7);
+            else                                        /* β in (0.25, 0.375] */
+                /* a≈0.9551, b≈0.2988 */
+                return hi - (hi >> 4) + (hi >> 6) + (hi >> 9)
+                     + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9);
+        } else {
+            /* β in [0, 0.25] */
+            if ((hi >> 3) < lo)                         /* β in (0.125, 0.25] */
+                /* a≈0.9839, b≈0.1838 */
+                return hi - (hi >> 6) - (hi >> 11)
+                     + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12);
+            else                                        /* β in [0, 0.125] */
+                /* a≈0.9990, b≈0.0620 */
+                return hi - (hi >> 10)
+                     + (lo >> 4) - (lo >> 11);
+        }
+    }
+}
+
+#ifndef FR_NO_WAVES
+/*=======================================================
+ * Wave generators — synth-style fixed-shape waveforms.
+ *
+ * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle)
+ * and return s16 in s0.15 format, clamped to [-32767, +32767] to match
+ * the trig amplitude convention used by fr_cos_bam / fr_sin_bam.
+ *
+ * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for
+ * a given output frequency, then accumulate it (mod 2^16) per sample.
+ *
+ * Side effects: pure functions (except fr_wave_noise which advances a
+ * caller-provided LFSR state pointer).
+ */
+
+/* fr_wave_sqr - 50%-duty square wave.
+ * phase < pi (BAM<0x8000) → +full; phase >= pi → -full.
+ */
+s16 fr_wave_sqr(u16 phase)
+{
+	return (phase < 0x8000) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_pwm - variable-duty pulse.
+ * `duty` is the BAM threshold: phase < duty → high, else low.
+ *   duty = 0      → always low
+ *   duty = 0x8000 → 50% duty (same as fr_wave_sqr)
+ *   duty = 0xffff → high almost everywhere (one BAM step low)
+ */
+s16 fr_wave_pwm(u16 phase, u16 duty)
+{
+	return (phase < duty) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_saw - rising sawtooth.
+ * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff),
+ * passing through 0 at phase=0x8000. The single boundary case phase=0
+ * (which would naturally produce -32768) is clamped to -32767 to keep the
+ * amplitude symmetric.
+ */
+s16 fr_wave_saw(u16 phase)
+{
+	s32 v = (s32)phase - (s32)0x8000;
+	if (v < -32767) v = -32767;
+	return (s16)v;
+}
+
+/* fr_wave_tri - symmetric triangle.
+ * Four linear segments:
+ *   Q1 [0, 0x4000)  : rising  0 → +peak
+ *   Q2 [0x4000, 0x8000): falling +peak → 0
+ *   Q3 [0x8000, 0xc000): falling 0 → -peak
+ *   Q4 [0xc000, 0x10000): rising  -peak → 0
+ * Peaks are clamped to +/-32767 (the natural unclamped formula gives
+ * +/-32768 at the exact peak BAM).
+ */
+s16 fr_wave_tri(u16 phase)
+{
+	s32 t;
+	if (phase < 0x8000)
+	{
+		/* First half: 0 -> +peak -> 0 */
+		if (phase < 0x4000)
+			t = (s32)phase << 1;          /* 0 .. 0x7ffe */
+		else
+			t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */
+		if (t > 32767) t = 32767;
+		return (s16)t;
+	}
+	else
+	{
+		/* Second half: 0 -> -peak -> 0 */
+		if (phase < 0xc000)
+			t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */
+		else
+			t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */
+		if (t > 32767) t = 32767;
+		return (s16)-t;
+	}
+}
+
+/* fr_wave_tri_morph - variable-symmetry triangle.
+ *
+ *   phase       : u16 BAM
+ *   break_point : u16 BAM where the wave reaches its positive peak.
+ *
+ * Going from 0 to +peak in [0, break_point), then from +peak back to 0
+ * in [break_point, 0xffff]. The result is a triangle whose rising and
+ * falling slopes can differ.
+ *
+ *   break_point = 0x8000  → symmetric triangle
+ *   break_point = 0xffff  → rising sawtooth (instant fall)
+ *   break_point = 0x0001  → falling sawtooth (instant rise)
+ *   break_point = 0       → degenerate; treated as 1 to avoid div-by-zero
+ *
+ * Note that this version returns values in [0, 32767] only (not bipolar).
+ * Caller can subtract 16384 and double if a bipolar version is desired.
+ *
+ * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20
+ * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if
+ * those targets matter to you.
+ */
+s16 fr_wave_tri_morph(u16 phase, u16 break_point)
+{
+	u32 t;
+	if (break_point == 0)
+		break_point = 1;
+	if (phase < break_point)
+	{
+		/* rising: 0 at phase=0, 32767 at phase=break_point */
+		t = (u32)(((u32)phase * 32767UL) / (u32)break_point);
+	}
+	else
+	{
+		/* falling: 32767 at phase=break_point, 0 at phase=0xffff */
+		u32 span = (u32)0xffff - (u32)break_point;
+		if (span == 0)
+			return 32767;
+		t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span);
+	}
+	if (t > 32767) t = 32767;
+	return (s16)t;
+}
+
+/* fr_wave_noise - LFSR-based pseudorandom noise.
+ *
+ *   state : pointer to a u32 the caller maintains. Initial value must
+ *           be non-zero (zero is a fixed point of the LFSR). A common
+ *           seed is 0xACE1u or any other non-zero constant.
+ *
+ * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish).
+ * Implementation: 32-bit Galois LFSR with the standard maximal-period
+ * tap polynomial 0xD0000001 (period 2^32 - 1 samples).
+ *
+ * Quality: this is "fast white noise" suitable for synth use. It is NOT
+ * cryptographically secure. For better statistical properties (FFT
+ * flatness etc.) layer a longer LFSR or use a separate PRNG.
+ */
+s16 fr_wave_noise(u32 *state)
+{
+	u32 lsb;
+	if (!state)
+		return 0;
+	lsb = *state & 1u;
+	*state >>= 1;
+	if (lsb)
+		*state ^= 0xD0000001u;
+	/* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */
+	{
+		s32 v = (s32)((*state >> 16) & 0xffffu) - 32768;
+		if (v < -32767) v = -32767;
+		return (s16)v;
+	}
+}
+
+/*=======================================================
+ * ADSR envelope generator
+ *
+ * Linear-segment Attack-Decay-Sustain-Release envelope. State is held
+ * in caller-allocated fr_adsr_t struct (no global state, no malloc).
+ *
+ * Lifecycle:
+ *   1. Caller allocates an fr_adsr_t (stack or static).
+ *   2. fr_adsr_init() once per patch with attack/decay/release durations
+ *      in samples and a sustain level in s0.15.
+ *   3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk`
+ *      samples, falls peak -> sustain over `dec` samples, then holds.
+ *   4. fr_adsr_release() on note-off. Output falls current -> 0 over a
+ *      time controlled by the release rate (rate, not duration: the
+ *      time depends on where in the envelope we are).
+ *   5. fr_adsr_step() once per audio sample to read the current value.
+ *
+ * Internal precision: levels are stored as s32 in s1.30 format so even
+ * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second)
+ * have a non-zero per-sample increment. Output is converted to s0.15.
+ *
+ * Saturation: the envelope state machine is self-clamping; level cannot
+ * escape [0, 1<<30]. Output is in [0, 32767].
+ */
+
+#define FR_ADSR_PEAK_S130 ((s32)1 << 30)
+
+void fr_adsr_init(fr_adsr_t *env,
+                  u32 attack_samples,
+                  u32 decay_samples,
+                  s16 sustain_level_s015,
+                  u32 release_samples)
+{
+	if (!env)
+		return;
+	env->state   = FR_ADSR_IDLE;
+	env->level   = 0;
+
+	/* sustain_level_s015 is s16 so its upper bound (32767) is already the
+	 * type's max; only the lower bound needs an explicit clamp. */
+	if (sustain_level_s015 < 0)
+		sustain_level_s015 = 0;
+	/* Convert s0.15 -> s1.30 by shifting left 15. */
+	env->sustain = (s32)sustain_level_s015 << 15;
+
+	env->attack_inc  = (attack_samples  > 0)
+	    ? (s32)(FR_ADSR_PEAK_S130 / attack_samples)
+	    : FR_ADSR_PEAK_S130;
+	env->decay_dec   = (decay_samples   > 0)
+	    ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples)
+	    : (FR_ADSR_PEAK_S130 - env->sustain);
+	env->release_dec = (release_samples > 0)
+	    ? (s32)(FR_ADSR_PEAK_S130 / release_samples)
+	    : FR_ADSR_PEAK_S130;
+}
+
+void fr_adsr_trigger(fr_adsr_t *env)
+{
+	if (!env)
+		return;
+	env->state = FR_ADSR_ATTACK;
+	env->level = 0;
+}
+
+void fr_adsr_release(fr_adsr_t *env)
+{
+	if (!env)
+		return;
+	env->state = FR_ADSR_RELEASE;
+}
+
+s16 fr_adsr_step(fr_adsr_t *env)
+{
+	if (!env)
+		return 0;
+	switch (env->state)
+	{
+	case FR_ADSR_ATTACK:
+		env->level += env->attack_inc;
+		if (env->level >= FR_ADSR_PEAK_S130)
+		{
+			env->level = FR_ADSR_PEAK_S130;
+			env->state = FR_ADSR_DECAY;
+		}
+		break;
+	case FR_ADSR_DECAY:
+		env->level -= env->decay_dec;
+		if (env->level <= env->sustain)
+		{
+			env->level = env->sustain;
+			env->state = FR_ADSR_SUSTAIN;
+		}
+		break;
+	case FR_ADSR_SUSTAIN:
+		env->level = env->sustain;
+		break;
+	case FR_ADSR_RELEASE:
+		env->level -= env->release_dec;
+		if (env->level <= 0)
+		{
+			env->level = 0;
+			env->state = FR_ADSR_IDLE;
+		}
+		break;
+	case FR_ADSR_IDLE:
+	default:
+		env->level = 0;
+		break;
+	}
+	/* s1.30 -> s0.15: shift right 15. Clamp for safety. */
+	{
+		s32 out = env->level >> 15;
+		if (out < 0) out = 0;
+		if (out > 32767) out = 32767;
+		return (s16)out;
+	}
+}
+#endif /* FR_NO_WAVES */
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 4392617..2eee910 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,36 +1,41 @@
-ARG TARGETPLATFORM=linux/amd64
-FROM debian:bookworm-slim
+FROM --platform=linux/amd64 ubuntu:22.04
+
+ENV DEBIAN_FRONTEND=noninteractive
 
 # Cross-compilers for FR_Math size report.
-# Bare toolchains only — no IDE, no runtime, no extras.
-# Build with: docker build --platform linux/amd64 -t fr-math-sizes docker/
+# Mirrors the proven xelp crossbuild Dockerfile (github.com/deftio/xelp).
+# Build with: docker build -t xelp-crossbuild:latest docker/
 
+# Use libc6-dev-i386 instead of gcc-multilib to avoid conflicts
+# with cross-compiler packages on 22.04.
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc g++ make bash binutils \
-    gcc-arm-none-eabi libnewlib-arm-none-eabi \
-    gcc-riscv64-unknown-elf \
-    gcc-12-m68k-linux-gnu \
+    build-essential \
+    gcc clang tcc \
+    libc6-dev-i386 \
+    lib32gcc-s1 \
+    gcc-aarch64-linux-gnu \
+    gcc-arm-none-eabi \
+    gcc-msp430 \
+    gcc-avr \
     gcc-m68hc1x \
+    gcc-m68k-linux-gnu \
+    gcc-powerpc-linux-gnu \
+    gcc-riscv64-linux-gnu \
+    gcc-riscv64-unknown-elf \
+    gcc-xtensa-lx106 \
+    picolibc-xtensa-lx106-elf \
+    gcc-mipsel-linux-gnu \
     sdcc \
+    binutils \
     wget ca-certificates xz-utils bzip2 \
     && rm -rf /var/lib/apt/lists/*
 
-# MSP430: TI's prebuilt msp430-elf-gcc (not in Debian repos).
-RUN wget -q https://dr-download.ti.com/software-development/ide-configuration-compiler-or-debugger/MD-LlCjWuAbzH/9.3.1.2/msp430-gcc-9.3.1.11_linux64.tar.bz2 \
-    -O /tmp/msp430-gcc.tar.bz2 \
-    && mkdir -p /opt/msp430-gcc \
-    && tar -xjf /tmp/msp430-gcc.tar.bz2 -C /opt/msp430-gcc --strip-components=1 \
-    && rm /tmp/msp430-gcc.tar.bz2
-
-ENV PATH="/opt/msp430-gcc/bin:${PATH}"
-
-# ESP32 (Xtensa): Espressif's prebuilt toolchain.
-RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-13.2.0_20240530/xtensa-esp-elf-13.2.0_20240530-x86_64-linux-gnu.tar.xz \
-    -O /tmp/xtensa-gcc.tar.xz \
-    && mkdir -p /opt/xtensa-gcc \
-    && tar -xJf /tmp/xtensa-gcc.tar.xz -C /opt/xtensa-gcc --strip-components=1 \
-    && rm /tmp/xtensa-gcc.tar.xz
-
-ENV PATH="/opt/xtensa-gcc/bin:${PATH}"
+# Espressif unified Xtensa toolchain (ESP32 LX6, ESP32-S2/S3 LX7).
+# Not in Ubuntu repos; prebuilt from Espressif's crosstool-NG releases.
+RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-15.2.0_20251204/xtensa-esp-elf-15.2.0_20251204-x86_64-linux-gnu.tar.xz \
+    -O /tmp/xtensa-esp.tar.xz \
+    && tar -xJf /tmp/xtensa-esp.tar.xz -C /opt \
+    && for f in /opt/xtensa-esp-elf/bin/xtensa-esp*; do ln -sf "$f" /usr/local/bin/; done \
+    && rm /tmp/xtensa-esp.tar.xz
 
 WORKDIR /src
diff --git a/docker/build_sizes_compare.sh b/docker/build_sizes_compare.sh
new file mode 100755
index 0000000..940de5f
--- /dev/null
+++ b/docker/build_sizes_compare.sh
@@ -0,0 +1,174 @@
+#!/usr/bin/env bash
+#
+# build_sizes_compare.sh — cross-compile FR_math.c with and without FR_tan32.c
+# for every supported target, and report the size delta.
+#
+# Run inside the Docker container:
+#   docker run --rm -v $(pwd):/src fr-math-sizes bash /src/docker/build_sizes_compare.sh
+
+set -euo pipefail
+
+SRC_OLD="/src/src/FR_math.c"
+SRC_NEW="/src/src/FR_tan32.c"
+INC="-I/src/src"
+OUT="/src/build/size_compare"
+
+mkdir -p "${OUT}"
+
+# ── helpers ────────────────────────────────────────────────────────────
+
+# get_text_size <compiler> <size-tool> <flags> <sources...>
+# Compiles source(s) to .o files, sums .text sections.
+get_text_size() {
+    local label="$1"; shift
+    local cc="$1"; shift
+    local sz_cmd="$1"; shift
+    local flags="$1"; shift
+    # remaining args are source files
+
+    if ! command -v "${cc}" >/dev/null 2>&1; then
+        echo "n/a"
+        return
+    fi
+
+    local total=0
+    for src in "$@"; do
+        local bname
+        bname=$(basename "${src}" .c)
+        local obj="${OUT}/${label}_${bname}.o"
+        if ! ${cc} ${flags} ${INC} -std=c99 -Wall -Os -ffreestanding \
+           -c "${src}" -o "${obj}" 2>/dev/null; then
+            echo "fail"
+            return
+        fi
+        local text
+        text=$(${sz_cmd} --format=berkeley "${obj}" 2>/dev/null | tail -1 | awk '{print $1}')
+        total=$((total + text))
+    done
+    echo "${total}"
+}
+
+# resolve_size_tool: given a compiler path, find the matching size binary
+resolve_size_tool() {
+    local cc="$1"
+    local prefix="${cc%-gcc*}"
+    prefix="${prefix%-gcc-*}"
+    if [[ "${prefix}" != "${cc}" ]] && command -v "${prefix}-size" >/dev/null 2>&1; then
+        echo "${prefix}-size"
+    else
+        echo "size"
+    fi
+}
+
+# ── target definitions ────────────────────────────────────────────────
+
+declare -a T_NAMES T_CCS T_SZ T_FLAGS
+
+add() {
+    T_NAMES+=("$1")
+    T_CCS+=("$2")
+    T_SZ+=("$(resolve_size_tool "$2")")
+    T_FLAGS+=("$3")
+}
+
+# ARM
+add "RP2040 (Cortex-M0+)"   arm-none-eabi-gcc  "-mcpu=cortex-m0plus -mthumb"
+add "STM32 (Cortex-M4)"     arm-none-eabi-gcc  "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft"
+add "Cortex-M0 (Thumb-1)"   arm-none-eabi-gcc  "-mcpu=cortex-m0 -mthumb"
+
+# RISC-V
+add "RISC-V 32 (rv32im)"    riscv64-unknown-elf-gcc  "-march=rv32im -mabi=ilp32"
+
+# Xtensa (ESP32)
+add "ESP32 (Xtensa)"        xtensa-esp-elf-gcc  ""
+
+# 68k
+add "68k"                    m68k-linux-gnu-gcc-12  ""
+
+# x86
+add "x86-32"                gcc  "-m32"
+add "x86-64"                gcc  "-m64"
+
+# MSP430 (16-bit, no stdint)
+add "MSP430"                msp430-elf-gcc  "-mmcu=msp430f5529 -DFR_NO_STDINT"
+
+# 68HC11 (8-bit)
+add "68HC11"                m68hc11-gcc  "-DFR_NO_STDINT"
+
+# ── compile ────────────────────────────────────────────────────────────
+
+echo ""
+echo "FR_Math cross-platform size comparison: OLD vs OLD+NEW tan32"
+echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')"
+echo ""
+
+declare -a R_OLD R_NEW
+
+for i in "${!T_NAMES[@]}"; do
+    label="${T_NAMES[$i]}"
+    cc="${T_CCS[$i]}"
+    sz="${T_SZ[$i]}"
+    flags="${T_FLAGS[$i]}"
+
+    tag=$(echo "${label}" | tr ' ()/' '____')
+
+    old=$(get_text_size "${tag}_old" "${cc}" "${sz}" "${flags}" "${SRC_OLD}")
+    new=$(get_text_size "${tag}_new" "${cc}" "${sz}" "${flags}" "${SRC_OLD}" "${SRC_NEW}")
+
+    R_OLD+=("${old}")
+    R_NEW+=("${new}")
+
+    echo "  ${label}: old=${old}  old+new=${new}"
+done
+
+# ── output table ───────────────────────────────────────────────────────
+
+echo ""
+echo "## FR_Math size: Old vs Old + 32-bit LUT tan (\`-Os -ffreestanding\`)"
+echo ""
+printf "| %-26s | %10s | %10s | %10s | %6s |\n" "Target" "Old (text)" "w/ tan32" "Delta" "Delta%"
+printf "| %-26s | %10s | %10s | %10s | %6s |\n" "--------------------------" "----------" "----------" "----------" "------"
+
+for i in "${!T_NAMES[@]}"; do
+    old="${R_OLD[$i]}"
+    new="${R_NEW[$i]}"
+
+    if [[ "${old}" =~ ^[0-9]+$ ]] && [[ "${new}" =~ ^[0-9]+$ ]]; then
+        delta=$((new - old))
+        pct=$(awk "BEGIN { printf \"%.1f\", 100.0*${delta}/${old} }")
+        printf "| %-26s | %8s B | %8s B | %+8d B | %5s%% |\n" \
+            "${T_NAMES[$i]}" "${old}" "${new}" "${delta}" "${pct}"
+    else
+        printf "| %-26s | %10s | %10s | %10s | %6s |\n" \
+            "${T_NAMES[$i]}" "${old}" "${new}" "—" "—"
+    fi
+done
+
+echo ""
+echo "Old = FR_math.c only (contains existing tan/atan)."
+echo "w/ tan32 = FR_math.c + FR_tan32.c (adds new 32-bit LUT tan/atan alongside old)."
+echo "Delta = additional bytes from FR_tan32.c (new functions + 129-entry u32 table)."
+echo ""
+
+# ── per-function breakdown (x86-64) ───────────────────────────────────
+
+echo "### Per-function breakdown (x86-64, GCC -Os)"
+echo ""
+
+obj_old="${OUT}/x86_64_old_FR_math.o"
+obj_new="${OUT}/x86_64_new_FR_tan32.o"
+
+if [[ -f "${obj_old}" ]] && [[ -f "${obj_new}" ]]; then
+    echo "**Old tan/atan in FR_math.o:**"
+    echo '```'
+    nm "${obj_old}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -iE "tan|atan" || true
+    echo '```'
+    echo ""
+    echo "**New in FR_tan32.o:**"
+    echo '```'
+    nm "${obj_new}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true
+    echo '```'
+fi
+
+echo ""
+echo "Done."
diff --git a/docker/size_detail.sh b/docker/size_detail.sh
new file mode 100755
index 0000000..19677e5
--- /dev/null
+++ b/docker/size_detail.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+INC="-I/src/src"
+FLAGS="-std=c99 -Wall -Os -ffreestanding"
+OUT=/tmp/sz
+mkdir -p "${OUT}"
+
+do_platform() {
+    local label="$1"
+    local cc="$2"
+    local flags="$3"
+
+    if ! command -v "${cc}" >/dev/null 2>&1; then
+        return
+    fi
+
+    # Resolve size and nm tools
+    local sz_cmd="size"
+    local nm_cmd="nm"
+    local prefix="${cc%-gcc*}"
+    if [ "${prefix}" != "${cc}" ]; then
+        command -v "${prefix}-size" >/dev/null 2>&1 && sz_cmd="${prefix}-size"
+        command -v "${prefix}-nm" >/dev/null 2>&1 && nm_cmd="${prefix}-nm"
+    fi
+
+    # Compile
+    ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_math.c  -o "${OUT}/old.o" 2>/dev/null || return
+    ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null || return
+
+    local old_text new_text
+    old_text=$(${sz_cmd} --format=berkeley "${OUT}/old.o" | tail -1 | awk '{print $1}')
+    new_text=$(${sz_cmd} --format=berkeley "${OUT}/new.o" | tail -1 | awk '{print $1}')
+
+    # Sum old tan/atan function sizes from nm -S
+    local old_tan_total=0
+    while IFS=' ' read -r addr size typ name; do
+        if [ -n "${size}" ]; then
+            dec_size=$((16#${size}))
+            old_tan_total=$((old_tan_total + dec_size))
+        fi
+    done < <(${nm_cmd} -n -S --defined-only "${OUT}/old.o" 2>/dev/null \
+             | grep -E " [tT] " | grep -iE "tan|atan" || true)
+
+    local replace_delta=$((new_text - old_tan_total))
+    local new_total=$((old_text - old_tan_total + new_text))
+
+    printf "| %-26s | %6s | %6s | %6s | %6s | %+6d |\n" \
+        "${label}" "${old_text}" "${old_tan_total}" "${new_text}" "${new_total}" "${replace_delta}"
+
+    rm -f "${OUT}/old.o" "${OUT}/new.o"
+}
+
+echo ""
+echo "## FR_Math: Old vs Replacement size (new tan32 replaces old tan/atan)"
+echo ""
+printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \
+    "Target" "Old" "OldT/A" "New" "Repl" "Delta"
+printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \
+    "--------------------------" "------" "------" "------" "------" "------"
+
+do_platform "RP2040 (Cortex-M0+)"     arm-none-eabi-gcc      "-mcpu=cortex-m0plus -mthumb"
+do_platform "STM32 (Cortex-M4)"       arm-none-eabi-gcc      "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft"
+do_platform "Cortex-M0 (Thumb-1)"     arm-none-eabi-gcc      "-mcpu=cortex-m0 -mthumb"
+do_platform "RISC-V 32 (rv32im)"      riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32"
+do_platform "ESP32 (Xtensa)"          xtensa-esp-elf-gcc     ""
+do_platform "68k"                      m68k-linux-gnu-gcc-12  ""
+do_platform "x86-32"                   gcc                    "-m32"
+do_platform "x86-64"                   gcc                    "-m64"
+do_platform "MSP430"                   msp430-elf-gcc         "-mmcu=msp430f5529 -DFR_NO_STDINT"
+
+echo ""
+echo "Old     = FR_math.c total .text"
+echo "OldT/A  = old tan+atan functions within FR_math.o (would be removed)"
+echo "New     = FR_tan32.c total .text (replacement functions + 129-entry u32 table)"
+echo "Repl    = library size after replacement (Old - OldT/A + New)"
+echo "Delta   = New - OldT/A (net change from replacement)"
+
+# === x86-64 per-function detail ===
+echo ""
+echo "### x86-64 per-function detail"
+echo ""
+
+gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_math.c  -o "${OUT}/old.o" 2>/dev/null
+gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null
+
+echo "**Old tan/atan functions in FR_math.o:**"
+echo '```'
+nm -n -S --defined-only "${OUT}/old.o" | grep -E " [tT] " | grep -iE "tan|atan" | \
+while IFS=' ' read -r addr size typ name; do
+    printf "  %-30s %d bytes\n" "${name}" "$((16#${size}))"
+done
+echo '```'
+
+echo ""
+echo "**New functions in FR_tan32.o:**"
+echo '```'
+nm -n -S --defined-only "${OUT}/new.o" | grep -E " [tT] " | \
+while IFS=' ' read -r addr size typ name; do
+    printf "  %-30s %d bytes\n" "${name}" "$((16#${size}))"
+done
+echo '```'
diff --git a/docs/README.md b/docs/README.md
index 0e1dd1a..636de4a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -16,7 +16,7 @@ into a single format.
   Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc,
   RISC-V toolchains, and Arduino.
 - Zero dependencies beyond `<stdint.h>`.
-- Parameterised radix: every function takes the binary point as an
+- Parameterized radix: every function takes the binary point as an
   argument, so you choose how many fractional bits you need per call.
 - Deterministic, bounded error — every public symbol has a documented
   worst case in the [API reference](api-reference.md).
@@ -33,7 +33,7 @@ or any tooling. If you want the browser version, look in
 | --- | --- |
 | [getting-started.md](getting-started.md) | Clone, build, run your first FR_Math program. |
 | [fixed-point-primer.md](fixed-point-primer.md) | Why fixed-point exists, sM.N notation, operations, how to pick a radix. |
-| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behaviour. |
+| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behavior. |
 | [examples.md](examples.md) | Runnable snippets: trig, log, waves, ADSR, 2D transforms. |
 | [building.md](building.md) | Makefile, scripts, test suite, coverage, cross-compilation. |
 | [releases.md](releases.md) | Release history with per-version highlights and breaking changes. |
@@ -42,27 +42,33 @@ or any tooling. If you want the browser version, look in
 
 Errors below are measured at Q16.16 (s15.16). All functions accept any
 radix — Q16.16 is just the reference point for the table. See the
-[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12,
-16, and 24. Percent errors skip expected values near zero (|expected| < 0.01).
+TDD report (run `make test-tdd` to generate `build/test_tdd_report.md`)
+for sweeps at radixes 8, 12, 16, and 24.
 
 <!-- ACCURACY_TABLE_START -->
-| Function | Max err (%) | Avg err (%) | Note |
+| Function | Max err (%)*| Avg err (%) | Note |
 |---|---:|---:|---|
-| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials |
-| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) |
-| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
-| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
-| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
-| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
-| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
-| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
-| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
+| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig |
+| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns |
+| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig |
+| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles |
+| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent |
+| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output |
+| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe |
+| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint |
+| sqrt | 0.0000 | 0.0000 | Round-to-nearest |
+| log2 | 0.0116 | 0.0016 | shift/add only for speed |
+| pow2 | 0.0018 | 0.0004 | shift/add only for speed |
+| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed |
+| exp | 0.0003 | 0.0000 | shift/add only for speed |
+| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
+| pow10 | 0.0005 | 0.0000 | shift/add only for speed |
+| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
+
+*Relative error; reference clamped to 1% of full-scale output.
 <!-- ACCURACY_TABLE_END -->
 
 ## What's in the box
@@ -71,8 +77,8 @@ radix — Q16.16 is just the reference point for the table. See the
 | --- | --- |
 | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
 | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
-| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` |
+| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
+| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` |
 | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
 | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
 | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
@@ -118,7 +124,7 @@ pays off:
 
 - **8- and 16-bit MCUs** (AVR, MSP430, 8051, sdcc) where the FPU does
   not exist and even software float is too slow or too large.
-- **Hot inner loops on any CPU** where a parameterised-radix integer
+- **Hot inner loops on any CPU** where a parameterized-radix integer
   multiply is faster and more deterministic than a `float`. Think DSP
   taps, PID loops, coordinate transforms inside a scanline renderer.
 - **Bit-exact reproducibility** across compilers, architectures, and
@@ -163,18 +169,23 @@ s32 two  = I2FR(2, R);              /* 2.0 → raw 131072              */
  *
  * MixedCase FR_ names are functions — they contain loops, tables, or
  * multi-step algorithms where inlining would waste ROM:
- *   FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
+ *   FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
  *
- * lowercase fr_ names are v2 functions (radian trig, wave generators,
- * ADSR envelopes):
- *   fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ...
+ * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave
+ * generators, ADSR envelopes):
+ *   fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan,
+ *   fr_wave_tri, fr_adsr_step ...
+ *
+ * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are
+ * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg.  New code
+ * should use the fr_ names directly.
  *
  * Some macros wrap functions: FR_EXP(x,r) scales x then calls
  * FR_pow2 — one-liner convenience, heavy lifting in the function.
  */
 
 /* ---- Math functions ---- */
-s32 c45   = FR_Cos(45, 0);                /* cos(45°) = 0.7071       */
+s32 c45   = fr_cos_deg(45, 0);            /* cos(45°) = 0.7071       */
 s32 s30   = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad)    */
 s32 root2 = FR_sqrt(two, R);              /* sqrt(2)  = 1.4142       */
 s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad     */
@@ -210,23 +221,23 @@ understand *how* the radix notation works first.
 | Multiply-free option | No | No | Yes (e.g. `FR_EXP_FAST`, `FR_hypot_fast8`) |
 | Wave generators | No | No | 6 shapes + ADSR |
 | Dependencies | None | ARM only | None |
-| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 4.2 KB |
+| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 3.4 KB lean / 5.7 KB full |
 
 Sizes measured with `arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -Os`.
 libfixmath covers trig/sqrt/exp in Q16.16 only; FR_Math includes
 log/ln/log10, wave generators, ADSR, print helpers, and variable radix.
 CMSIS-DSP estimate is for the math function subset only.
-See [`docker/build_sizes.sh`](../docker/build_sizes.sh) for the build
-script.
+See [`scripts/crossbuild_sizes.sh`](../scripts/crossbuild_sizes.sh) for
+the build script.
 
 ## History
 
 FR_Math has been in service since **2000**, originally built for
 graphics transforms on 16 MHz 68k Palm Pilots (it shipped inside
 Trumpetsoft's *Inkstorm*), then ported forward to ARM, x86, MIPS,
-RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current
-release with a full test suite, bit-exact numerical
-specification, and CI on every push.
+RISC-V, and various 8/16-bit embedded targets. The current release
+has a full test suite, bit-exact numerical specification, and CI on
+every push.
 
 ## License
 
diff --git a/docs/api-reference.md b/docs/api-reference.md
index 3f97f20..cf1cec5 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -1,7 +1,7 @@
 # API Reference
 
 Every public symbol, grouped by topic. Each entry lists the radix
-convention, the precision, and the error / saturation behaviour. All
+convention, the precision, and the error / saturation behavior. All
 types are from `FR_defs.h`: `s8 s16 s32 s64` for
 signed and `u8 u16 u32 u64` for unsigned integers (these are
 aliases for the `<stdint.h>` types).
@@ -12,7 +12,7 @@ Most entries list **inputs**, **output**,
 **radix handling** and **precision**
 separately, because in a mixed-radix library those four things are
 what actually lets you plan an arithmetic pipeline without hidden
-quantisation. If you are new to fixed-point, the
+quantization. If you are new to fixed-point, the
 [Fixed-Point Primer](fixed-point-primer.md) explains the
 notation first; come back here once you're comfortable reading
 `s15.16` and `s0.15`.
@@ -79,6 +79,8 @@ and in return get float-like ergonomics with integer-only codegen.
 | `FR_OVERFLOW_POS` | `0x7FFFFFFF` (`INT32_MAX`) | Saturating ops when the true result exceeds `+2^31`. |
 | `FR_OVERFLOW_NEG` | `0x80000000` (`INT32_MIN`) | Saturating ops when the true result is below `−2^31`. |
 | `FR_DOMAIN_ERROR` | `0x80000000` (`INT32_MIN`) | Functions with an invalid input, e.g. `FR_sqrt(-1)`, `FR_log2(0)`, `FR_asin(2.0)`. **Shares the bit pattern of `FR_OVERFLOW_NEG`**, so don't mix a `≤ FR_OVERFLOW_NEG` check with a domain check — test for the exact sentinel. |
+| `FR_TRIG_MAXVAL` | `0x7FFFFFFF` (`INT32_MAX`) | Tangent saturation ceiling. Returned by `fr_tan_bam`, `fr_tan`, `fr_tan_deg`, and `FR_TanI` when the angle is near a pole (90° + k·180°). |
+| `FR_TRIG_MINVAL` | `-FR_TRIG_MAXVAL` | Tangent saturation floor. Negative-side pole saturation. |
 
 ### Common numerical constants (`FR_math.h`)
 
@@ -118,7 +120,7 @@ so call sites read as intent:
 | --- | --- | --- | --- |
 | `I2FR(i, r)` | `i`: integer; `r`: target radix in bits | `s32` at radix `r` | `(i) << (r)`. No bounds check. Use when you know `|i|` fits in `32 − r` signed bits. |
 | `FR2I(x, r)` | `x`: fixed-point at radix `r` | integer | `(x) >> (r)`. Truncates toward **−∞** (C's signed shift). `FR2I(-1, 4) == -1`, not 0. |
-| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behaviour. |
+| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behavior. |
 | `FR_NUM(i, f, d, r)` | `i`: integer part; `f`: decimal fraction digits; `d`: number of digits in `f`; `r`: target radix | `s32` at radix `r` | Build a fixed-point literal from decimal. `FR_NUM(12, 34, 2, 10)` is 12.34 at s.10. Rounds toward zero; for round-to-nearest, add half an LSB at the call site. |
 | `FR_numstr(s, r)` | `s`: null-terminated decimal string (e.g. `"3.14159"`); `r`: target radix | `s32` at radix `r` | Runtime string-to-fixed-point parser (inverse of `FR_printNumF`). Handles signs, leading whitespace, and leading-zero fractions like `"0.05"`. Up to 9 fractional digits. No malloc, no strtod, no libm. Returns 0 for NULL or empty input. |
 | `FR2D(x, r)` | `x`: fixed-point at radix `r` | `double` | Debug-only: `x / (double)(1 << r)`. Pulls in `libm` — compile it out of release builds. |
@@ -157,13 +159,13 @@ so call sites read as intent:
 | `FR_MAX(a, b)` | Two values of the same type | The larger of the two | Evaluates each argument once. |
 | `FR_CLAMP(x, lo, hi)` | `x`: value; `lo`, `hi`: bounds | `x` clamped to `[lo, hi]` | Equivalent to `FR_MIN(FR_MAX(x, lo), hi)`. |
 | `FR_DIV(x, xr, y, yr)` | `x`: numerator at radix `xr`; `y`: denominator at radix `yr` | `s32` at radix `xr` | Pre-scales the numerator in a 64-bit intermediate and **rounds to nearest** (adds half the divisor before truncating, with correct sign handling). Worst-case error ≤ 0.5 LSB. Works correctly across the full Q16.16 range. |
-| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behaviour of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. |
+| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behavior of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. |
 | `FR_DIV32(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s32)(x) << (yr)) / (s32)(y)`. 32-bit-only truncating path — requires `|x| < 2^(31 − yr)` to avoid overflow in the intermediate shift. Use on tiny targets (PIC, AVR, 8051) where 64-bit ops pull in unwanted compiler runtime code. |
 | `FR_MOD(x, y)` | `x`, `y`: same radix | remainder at the same radix | `(x) % (y)`. Standard C remainder semantics. |
 
 ## Arithmetic
 
-FR_Math splits arithmetic into three flavours. The
+FR_Math splits arithmetic into three flavors. The
 **macros** (`FR_ADD`, `FR_SUB`)
 are mixed-radix, inline, and wrap on overflow. The **s.16
 helper functions** (`FR_FixMuls`,
@@ -291,7 +293,7 @@ bits = 16. Going wider would only add noise, not precision.
 
 "But what if I want to pass in any signed angle without worrying
 about conversion?" That is exactly what `FR_CosI(deg)`,
-`FR_Cos(deg, radix)`, and `fr_cos(rad, radix)` are for. All three
+`fr_cos_deg(deg, radix)`, and `fr_cos(rad, radix)` are for. All three
 take *signed* inputs and reduce them to BAM for you. The only place
 you actually see a `u16` is at the internal `fr_cos_bam` /
 `fr_sin_bam` boundary, which you only call by hand if you *want*
@@ -346,7 +348,7 @@ Four shifts plus three adds — cheap on an 8051, AVR, or any
 hand-written DSP inner loop — and the answer has at most
 ±0.5 LSB of truncation error. The same discipline applies to
 the other direction: in `FR_DEG2BAM` the divide-by-360 is
-a compile-time constant, so any optimising compiler folds it into a
+a compile-time constant, so any optimizing compiler folds it into a
 multiply-by-reciprocal (or, on a weaker toolchain, a runtime call
 that you can inline yourself).
 
@@ -396,6 +398,7 @@ represents exactly 1.0 in the s15.16 output format.
 | --- | --- | --- |
 | `fr_cos_bam` | `s32 fr_cos_bam(u16 bam)` | s15.16, range [−65536, +65536]. Exact at cardinal angles. |
 | `fr_sin_bam` | `s32 fr_sin_bam(u16 bam)` | s15.16. Defined as `fr_cos_bam(bam − FR_BAM_QUADRANT)`. |
+| `fr_tan_bam` | `s32 fr_tan_bam(u16 bam)` | s15.16. Uses a 65-entry octant table for [0, 45°] and the reciprocal identity `tan(x) = 1/tan(90°−x)` for (45°, 90°). Saturates to `±FR_TRIG_MAXVAL` at the poles (90°, 270°). Returns exact 0 at 0° and 180°. No 64-bit intermediates; one 32-bit division only in the >45° path. |
 
 ### Radian-native
 
@@ -405,35 +408,35 @@ represents exactly 1.0 in the s15.16 output format.
 | `fr_sin` | `s32 fr_sin(s32 rad, u16 radix)` | Same convention. |
 | `fr_tan` | `s32 fr_tan(s32 rad, u16 radix)` | Returns at **radix 16** (`FR_TRIG_OUT_PREC`). Computed as `(sin << 16) / cos`; saturates to `±INT32_MAX` (`FR_TRIG_MAXVAL`) near π/2 + kπ where cos → 0. |
 
-### Integer-degree wrappers (legacy API)
+### Degree wrappers (current and legacy)
 
-The uppercase legacy API takes an angle in degrees.
-`FR_SinI`, `FR_CosI` and `FR_TanI`
-take plain integer degrees — the trailing *I* denotes
-*integer*. The variants *without* the `I`
-suffix (`FR_Sin`, `FR_Cos`, `FR_Tan`)
-accept a `radix` argument and treat the degree value as
-*fixed-point*, so you can pass fractional degrees like
-42.375°.
+The primary degree-based API uses lowercase `fr_` names.
+These are functions (not macros) that take a degree value as
+fixed-point at a caller-chosen radix:
 
-| Symbol | Signature | Kind |
+| Function | Signature | Notes |
 | --- | --- | --- |
-| `FR_SinI` | `FR_SinI(deg)` → `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. |
-| `FR_CosI` | `FR_CosI(deg)` → `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. |
-| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
-| `FR_Sin` | `s32 FR_Sin(s16 deg, u16 radix)` | `deg` is fixed-point at `radix`. Returns s15.16. |
-| `FR_Cos` | `s32 FR_Cos(s16 deg, u16 radix)` | Same. |
-| `FR_Tan` | `s32 FR_Tan(s16 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
+| `fr_sin_deg` | `s32 fr_sin_deg(s32 deg, u16 radix)` | `deg` is fixed-point degrees at `radix`. Returns s15.16. |
+| `fr_cos_deg` | `s32 fr_cos_deg(s32 deg, u16 radix)` | Same. |
+| `fr_tan_deg` | `s32 fr_tan_deg(s32 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
 
-### Degree wrappers on the BAM path
+Pass `radix = 0` for plain integer degrees, or a higher radix
+for fractional degrees (e.g. 42.375° at radix 4).
 
-If you're using the lowercase family and want to skip the
-radix entirely, two convenience macros cover pure integer degrees:
+**Integer-degree macros** (`FR_SinI`, `FR_CosI`, `FR_TanI`)
+take plain integer degrees -- the trailing *I* denotes
+*integer*. These remain unchanged:
 
-| Macro | Expansion |
-| --- | --- |
-| `fr_cos_deg(deg)` | `fr_cos_bam(FR_DEG2BAM(deg))` |
-| `fr_sin_deg(deg)` | `fr_sin_bam(FR_DEG2BAM(deg))` |
+| Symbol | Signature | Kind |
+| --- | --- | --- |
+| `FR_SinI` | `FR_SinI(deg)` -> `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. |
+| `FR_CosI` | `FR_CosI(deg)` -> `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. |
+| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
+
+**Legacy aliases.** The uppercase `FR_Sin`, `FR_Cos`, and
+`FR_Tan` macros still work -- they map directly to
+`fr_sin_deg`, `fr_cos_deg`, and `fr_tan_deg` respectively.
+New code should use the `fr_` names.
 
 ## Inverse trigonometry
 
diff --git a/docs/building.md b/docs/building.md
index c9f5f21..db8d192 100644
--- a/docs/building.md
+++ b/docs/building.md
@@ -14,9 +14,10 @@ FR_Math has no dependencies beyond a C99 compiler and
 - Optional: `lcov` / `gcov` for coverage
   reports.
 
-There is no Autotools, no CMake, no Ninja, no package-manager
-integration. The library is small enough that the Makefile fits on
-one screen.
+There is no Autotools, no Ninja, no package-manager integration.
+The primary build system is a single Makefile. A `CMakeLists.txt`
+exists for ESP-IDF integration only — it is not a general-purpose
+CMake build.
 
 ## Makefile targets
 
@@ -90,28 +91,27 @@ See `release_management.md` for the full step-by-step reference.
 
 ## The test suite
 
-Tests live under `tests/` and are split into six
+Tests live under `tests/` and are split into seven
 binaries to keep compile times low:
 
 | Binary | What it checks |
 | --- | --- |
-| `test_basic` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding. |
-| `test_trig` | Integer-degree trig (`FR_Sin` et al.). |
-| `test_trig_radians` | Radian / BAM trig and the v2 `fr_sin` API. |
-| `test_log_exp` | Log base 2 / ln / log10 and their inverses. |
+| `fr_test` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding (legacy harness). |
+| `test_comprehensive` | Trig (degree, radian, BAM), log/exp, sqrt, hypot. |
 | `test_2d` | 2D transforms, determinants, inverses. |
-| `test_full_coverage` | Dark-corner cases: overflow sentinels, edge radixes, round-trips. |
-| `test_tdd` | Characterisation tests pinned to bit-exact reference values. |
+| `test_overflow` | Overflow sentinels, saturation, edge radixes. |
+| `test_full` | Full-coverage dark-corner cases and round-trips. |
+| `test_2d_complete` | Extended 2D: matrix composition, inverse, point transforms. |
+| `test_tdd` | Characterization tests pinned to bit-exact reference values. |
 
-As of v2.0.0 the suite contains **42 tests** across
-those binaries and covers **99%** of the library source.
+The suite covers **99%** of the library source.
 Every public symbol is exercised at least once.
 
 ### Running a single binary
 
 ```bash
-make build/test_basic
-./build/test_basic
+make test-comprehensive
+./build/test_comprehensive
 
 # or all of them at once
 make test
@@ -119,12 +119,12 @@ make test
 
 ### Running the TDD pins after a change
 
-`test_tdd.cpp` is a characterisation suite. It records
+`test_tdd.cpp` is a characterization suite. It records
 exact bit patterns for a sample of inputs and fails loudly if those
-patterns drift. Any change that modifies the numerical behaviour of
+patterns drift. Any change that modifies the numerical behavior of
 the library will break this suite — that's the point.
 
-If you *intended* to change the numerical behaviour (e.g.
+If you *intended* to change the numerical behavior (e.g.
 you improved a polynomial approximation), update the pinned values in
 `tests/test_tdd.cpp` and note the change in
 `release_notes.md` along with any updates to the
@@ -155,44 +155,60 @@ you do *not* need `libm`.
 | Motorola 68k | `m68k-linux-gnu-gcc` | Docker. |
 | Motorola 68HC11 | `m68hc11-gcc` | Docker. |
 | PowerPC | `powerpc-linux-gnu-gcc` | Docker. |
+| MIPS32 | `mipsel-linux-gnu-gcc` | Docker. |
 | Xtensa LX106 (ESP8266) | `xtensa-lx106-elf-gcc` | Docker. |
+| Xtensa LX7 (ESP32-S3) | `xtensa-esp-elf-gcc` | Docker (Espressif toolchain). |
 | 8051 | `sdcc` | Manual. |
 
 ### Code size (.text section, compiled with `-Os`)
 
-Sizes are for `FR_math.c` compiled with `-Os -ffreestanding`.
-Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves).
+Sizes are for `FR_math.c` compiled with `-Os`.
+Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt).
+Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot).
+Full = all features (+ print, waves, ADSR).
 With `-ffunction-sections` and linker `--gc-sections`, only the
 functions your application references are linked, so real flash
 usage will be smaller.
 
 <!-- SIZE_TABLE_START -->
-| Target | Core | Full |
-|--------|-----:|-----:|
-| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB |
-| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB |
-| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB |
-| ESP32 (Xtensa) | 3.5 KB | 5.2 KB |
-| 68k | 3.5 KB | 5.3 KB |
-| x86-64 (GCC) | 3.5 KB | 5.7 KB |
-| x86-32 | 4.5 KB | 6.8 KB |
-| MSP430 (16-bit) | 5.9 KB | 8.9 KB |
-| 68HC11 | 10.8 KB | 16.0 KB |
-| AVR (ATmega328P) | 7.0 KB | 10.6 KB |
+| Target | Lean | Core | Full |
+|--------|-----:|-----:|-----:|
+| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB |
+| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB |
+| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB |
+| ARM Thumb | 3.4 KB | 4.7 KB | 5.9 KB |
+| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB |
+| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB |
+| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB |
+| ARM32 | 4.3 KB | 5.8 KB | 7.7 KB |
+| 68k | 4.4 KB | 6.2 KB | 7.8 KB |
+| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB |
+| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB |
+| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB |
+| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB |
+| PowerPC | 5.8 KB | 8.0 KB | 10.4 KB |
+| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB |
+| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB |
+| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB |
 <!-- SIZE_TABLE_END -->
 
 ### Lean build options
 
-Three compile-time `#define` guards let you strip optional subsystems
+Compile-time `#define` guards let you strip optional subsystems
 for ROM-constrained targets. Define them before including `FR_math.h`
 (or pass `-D` on the compiler command line):
 
 | Define | What it removes | Typical savings |
 |---|---|---|
-| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB |
+| `FR_LEAN` | Degree trig, BAM tan, angle converters, `FR_log10`, `FR_hypot`, waves + ADSR | ~3.7 KB |
+| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB |
 | `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB |
 | `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB |
 
+`FR_LEAN` keeps only radian trig (sin, cos, tan), inverse trig, sqrt,
+log2, ln, exp, pow2, and arithmetic — comparable to libfixmath's API at
+4.7 KB text. `FR_LEAN` implies `FR_NO_WAVES`.
+
 `FR_CORE_ONLY` is a convenience shorthand that defines both
 `FR_NO_PRINT` and `FR_NO_WAVES` in one step.
 
@@ -211,7 +227,7 @@ To regenerate this table, run the Docker cross-build
 (requires the [xelp](https://github.com/deftio/xelp) Docker image):
 
 ```bash
-scripts/crossbuild-docker.sh
+scripts/crossbuild_sizes.sh
 ```
 
 ### Example: RISC-V
diff --git a/docs/examples.md b/docs/examples.md
index b7d6145..d54c34e 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -1,7 +1,7 @@
 # Examples
 
 Short, runnable snippets for the most common FR_Math tasks. Each
-example compiles cleanly against the v2.0.0 library with:
+example compiles cleanly against the library with:
 
 ```bash
 cc -Isrc example.c src/FR_math.c -o example
@@ -57,15 +57,15 @@ int main(void)
 ## 2. Trig — integer degrees vs radian vs BAM
 
 FR_Math supports three angle conventions and this example hits
-all three: integer degrees through the legacy
-`FR_Sin` / `FR_Cos` API, the radian-native
+all three: fixed-point degrees through the
+`fr_sin_deg` / `fr_cos_deg` API, the radian-native
 `fr_sin` / `fr_cos` (radian at a chosen
 input radix), and BAM-native `fr_sin_bam` /
 `fr_cos_bam`. All three paths feed the same 129-entry
 quadrant cosine table under the hood and should produce nearly
 identical results.
 
-*Caveats:* the `radix` parameter on `FR_Sin(deg, radix)` is
+*Caveats:* the `radix` parameter on `fr_sin_deg(deg, radix)` is
 the radix of the *degree input*, not the output. All sin/cos
 functions return **s15.16** — that is, `s32` at radix 16,
 where 1.0 = 65536 (`FR_TRIG_ONE`). The values compared below
@@ -200,19 +200,16 @@ int main(void)
 ## 5. Arctangent and atan2
 
 The inverse-trig functions in FR_Math return angles in
-**degrees**, not radians — the output fits in
-an `s16` and you can feed it straight back into
-`FR_SinI` / `FR_CosI` without any
-conversion. This example exercises both `FR_atan`
-(single-argument ratio) and `FR_atan2` (full-circle,
-two-argument).
-
-*Caveats:* `FR_atan2` takes only two
-arguments (`y`, `x`) and has no radix
-parameter — it returns degrees in [−180, 180] as
-`s16`. The `radix` argument on
-`FR_atan` is the radix of the *input* ratio,
-not of the output.
+**radians** at a caller-chosen output radix. This
+example exercises both `FR_atan` (single-argument
+ratio) and `FR_atan2` (full-circle, two-argument).
+
+*Caveats:* all inverse-trig functions take an
+`out_radix` parameter that sets the radix of the
+*output*. `FR_atan2(y, x, out_radix)` returns
+radians in [−π, π] as `s32` at the chosen
+radix. `FR_atan(input, radix, out_radix)` has
+separate radixes for input and output.
 
 ```c
 #include <stdio.h>
@@ -222,18 +219,19 @@ int main(void)
 {
     const u16 r = 14;
 
-    /* atan(1) = 45 degrees */
-    s16 a = FR_atan(I2FR(1, r), r);
-    printf("atan(1) = %d degrees (expect 45)\n", a);
+    /* atan(1) = pi/4 radians ≈ 0.7854 */
+    s32 a = FR_atan(I2FR(1, r), r, r);
+    printf("atan(1) = %d (radix %d, expect ~%d)\n",
+           (int)a, r, (int)(12868));  /* pi/4 at r14 */
 
     /* Full-circle atan2 */
-    s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r));  /*  135 deg */
-    s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r));  /* -135 deg */
-    printf("atan2( 1,-1) = %d\n", q2);
-    printf("atan2(-1,-1) = %d\n", q3);
+    s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r);  /*  3*pi/4 */
+    s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r);  /* -3*pi/4 */
+    printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603));
+    printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603));
 
     /* asin with out-of-domain input */
-    s16 bad = FR_asin(I2FR(2, r), r);
+    s32 bad = FR_asin(I2FR(2, r), r, r);
     if (bad == FR_DOMAIN_ERROR)
         printf("asin(2) rejected, good.\n");
     return 0;
@@ -425,7 +423,7 @@ The `XFormPtI16` fast path takes `s16`
 coordinates in and writes `s16` out. It's a tiny
 bit lossier than the `s32` form, but it sidesteps all
 the fixed-point conversion on the hot path — useful inside
-the inner loop of a scanline rasteriser where you already know
+the inner loop of a scanline rasterizer where you already know
 your coordinates fit in 16 bits.
 
 *Caveats:* the output is narrowed to `s16`,
@@ -511,7 +509,7 @@ int main(void)
         FR_printNumF(buf_putc, val, 16, 0, 8);
         printf("    16     16  0x%08x  %s\n", (unsigned)val, buf);
         /* Expected: "3.14158630" — good through 5 digits, then
-         * quantisation noise appears.  This is the sweet spot for
+         * quantization noise appears.  This is the sweet spot for
          * most embedded work: 16 bits of fraction fits in an s32
          * with 15 bits of integer range (±32767). */
     }
@@ -562,9 +560,38 @@ at radix 8 the value is `0x324` — only 10 significant bits —
 so the decimal rendering can only faithfully reproduce about two
 fractional digits. At radix 24 the value is `0x03243F6A` — 26
 significant bits — and seven decimal digits survive. The
-eighth digit (`5` vs `4`) shows the quantisation floor: `2^−24 ≈
+eighth digit (`5` vs `4`) shows the quantization floor: `2^−24 ≈
 6 × 10^−8`, so the last digit is always uncertain.
 
+## Desktop example programs
+
+In addition to the inline snippets above, the `examples/` directory
+contains four self-contained desktop programs. Each has its own
+`Makefile` and `README.md`; build artifacts stay within the example's
+directory.
+
+| Directory | What it does |
+|---|---|
+| [`examples/fixed-point-basics/`](../examples/fixed-point-basics/) | Educational walkthrough of radix interpretation, `I2FR`/`FR2I` round-trips, `FR_NUM` constant construction, aligned add/sub, multiply precision, division, saturation, and `FR_printNumF` formatted output. |
+| [`examples/log-exp-curves/`](../examples/log-exp-curves/) | Sweeps `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, and `FR_sqrt` against IEEE double reference values, printing per-point and summary error tables. |
+| [`examples/waveform-synth/`](../examples/waveform-synth/) | Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus an ADSR envelope and amplitude-modulated combination. Default mode renders ASCII art; `--csv` mode outputs machine-readable CSV. |
+| [`examples/trig-accuracy/`](../examples/trig-accuracy/) | Head-to-head comparison of FR_Math (`FR_SinI`/`FR_CosI`/`FR_TanI`) vs libfixmath (`fix16_sin`/`fix16_cos`/`fix16_tan`) vs IEEE double over 0–360 degrees. Requires libfixmath source. |
+
+Build all from the repo root:
+
+```bash
+make examples        # builds all desktop examples
+make run-examples    # builds and runs 1-3, plus 4 if libfixmath present
+```
+
+Or build any single example from its directory:
+
+```bash
+cd examples/waveform-synth
+make run             # ASCII art output
+make run-csv         # CSV output
+```
+
 ## See also
 
 - [API Reference](api-reference.md) — full
diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md
index 4c127ad..489a127 100644
--- a/docs/fixed-point-primer.md
+++ b/docs/fixed-point-primer.md
@@ -266,14 +266,14 @@ you want to think of an FR_Math value as a "number with a
 radix", think of the radix as a *type annotation that lives
 in your source code*, not a runtime field.
 
-## Quantisation and loss of precision
+## Quantization and loss of precision
 
 Fixing the radix also fixes the smallest representable fractional
 step. At radix *N*, that step is `2^−N` — nothing finer survives
 the round-trip into the integer. Any real value smaller than the
 step rounds to zero; any real value landing between two adjacent
 steps rounds to one of them. The difference between the ideal
-value and its stored form is called **quantisation error**, and it
+value and its stored form is called **quantization error**, and it
 is the main price paid for doing fractional math in integer
 registers.
 
@@ -295,7 +295,7 @@ radix 16 and the picture changes:
 error                       =  0.00000153  (< 0.002 %)
 ```
 
-This behaviour isn't a bug — it is the same compromise IEEE-754
+This behavior isn't a bug — it is the same compromise IEEE-754
 floating point makes with its mantissa. The difference is that a
 float hides the trade-off behind a variable exponent, while
 fixed-point puts it on a ledger that the programmer chooses up
@@ -307,7 +307,7 @@ half the smallest step the application cares about. Any coarser
 and small signals vanish; any finer and integer headroom is being
 spent for no benefit.
 
-A second consequence worth recording: quantisation error
+A second consequence worth recording: quantization error
 *accumulates*. Summing a million low-radix values sums the errors
 too. Signal-processing pipelines with long feedback paths are the
 main reason to carry accumulators at a wider radix than the
@@ -375,7 +375,7 @@ FR_Math ships this operation as
 callback `f`, which makes it usable on targets without stdio — a
 UART write, an LCD glyph pusher, a ring-buffer append. The `pad`
 parameter sets a minimum field width and `prec` sets the number of
-fractional digits. Rounding behaviour matches the hand-rolled
+fractional digits. Rounding behavior matches the hand-rolled
 version: excess fractional digits are truncated, and negative
 values are handled without the two's-complement trap described
 above.
@@ -384,7 +384,7 @@ above.
 
 Once you've chosen a radix, the everyday operations behave
 almost like integer math — with one or two twists per
-operation that you just have to internalise. Let's walk
+operation that you just have to internalize. Let's walk
 through them.
 
 ### Addition and subtraction
@@ -527,7 +527,7 @@ Three things to watch for:
   it explicitly before the divide.
 - **Rounding toward zero.** C's integer division truncates toward
   zero for both signs, so `−7 / 2 == −3` (not `−4`). Fixed-point
-  division inherits that behaviour. Round-to-nearest can be
+  division inherits that behavior. Round-to-nearest can be
   layered on top by adding `b / 2` (for a positive numerator) or
   `−b / 2` (for a negative numerator) to the pre-scaled numerator
   before the divide.
@@ -557,7 +557,7 @@ for you:
 - Going to a *smaller* radix — the low bits are
   dropped. Precision is lost; headroom grows. This is a good
   place to add `± (1 << (from_r - to_r - 1))`
-  before the shift if you want round-to-nearest behaviour.
+  before the shift if you want round-to-nearest behavior.
 
 The value is conserved as closely as the destination radix can
 represent it. Nothing more, nothing less.
@@ -620,7 +620,7 @@ and store the result back into a 32-bit register without thinking
 about it, you will eventually pass a pair of inputs whose product
 doesn't fit, and plain C will hand you wrap-around garbage
 with no warning. A signed 32-bit multiply that overflows is not a
-runtime error in C — it's undefined behaviour that
+runtime error in C — it's undefined behavior that
 happens to look like data most of the time.
 
 FR_Math defends against this in three layers, and it's
@@ -711,12 +711,12 @@ you actually need 15 integer bits on that particular signal.
 ## A worked example: one-pole IIR low-pass filter
 
 The sections up to this point have introduced the pieces
-individually: scaling, notation, quantisation, arithmetic,
+individually: scaling, notation, quantization, arithmetic,
 overflow, and radix choice. A small end-to-end example is the
 fastest way to see how those pieces fit together on a real
 pipeline. The filter walked through below is a single-pole
 infinite-impulse-response (IIR) low-pass — about the simplest
-entry in the DSP catalogue, but realistic enough to exercise
+entry in the DSP catalog, but realistic enough to exercise
 nearly every decision the primer has covered so far.
 
 In floating point, the filter is one line of arithmetic:
@@ -753,7 +753,7 @@ be picked:
   `x`, so it shares the same ±32767 output range. But because it
   accumulates small updates on every sample, it will drift and
   lose precision unless carried at a higher radix than the raw
-  input. This is the quantisation-error accumulation noted
+  input. This is the quantization-error accumulation noted
   earlier in the primer, showing up in practice.
 
 ### Step 2: pick the radixes
@@ -841,7 +841,7 @@ feeds both versions a few thousand samples — a mix of sine tones,
 step inputs, and silence is enough to exercise the relevant paths
 — and reports the worst-case delta. For a radix-15 one-pole IIR
 the expected worst-case difference is on the order of a few LSB,
-comparable to the inherent quantisation of the 16-bit output
+comparable to the inherent quantization of the 16-bit output
 format and not audible in normal listening. Anything substantially
 larger indicates a radix choice that is too tight, a rounding
 mode that is drifting, or a missing int64 promotion on the
@@ -862,8 +862,8 @@ generation of each symbol:
 | Prefix | What it is | Example |
 | --- | --- | --- |
 | `FR_XXX()` | `UPPERCASE` macro — inline, zero call overhead. | `FR_ADD`, `FR_ABS`, `FR2I` |
-| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Integer-degree trig and related. | `FR_Sin`, `FR_log2`, `FR_sqrt` |
-| `fr_xxx()` | Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR). | `fr_sin`, `fr_wave_tri`, `fr_adsr_step` |
+| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Log, sqrt, inverse trig, and related. | `FR_log2`, `FR_sqrt`, `FR_atan2` |
+| `fr_xxx()` | Lowercase C function — v2 API (degree/radian/BAM trig, wave generators, ADSR). `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` are the current degree-based trig names. `FR_Sin`/`FR_Cos`/`FR_Tan` remain as legacy aliases. | `fr_sin_deg`, `fr_sin`, `fr_wave_tri`, `fr_adsr_step` |
 | `s8, s16, s32` | Signed integer typedefs (aliases for `int8_t`, `int16_t`, `int32_t`). | — |
 | `u8, u16, u32` | Unsigned integer typedefs. | — |
 
@@ -917,13 +917,13 @@ Angles deserve their own section because FR_Math gives you
 angle into?** Because the `u16` wraparound *is* the angular
 modulus — that's the whole feature. Adding two `u16` BAM values
 automatically gives you the right answer modulo a full revolution,
-with zero quantisation error at the boundary and no `% 65536` in
+with zero quantization error at the boundary and no `% 65536` in
 sight. If BAM were `s32`, every read of the table would have to
 explicitly mask off the top bits (and handle negative values)
 before the quadrant extraction (`bam >> 14`) made any sense. You
 would have traded one free operation for two slow ones on every
-sample, just to get back the same behaviour. So instead, the public
-trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends)
+sample, just to get back the same behavior. So instead, the public
+trig entry points (`FR_CosI`, `fr_cos_deg`, `fr_cos`, and friends)
 *all* take signed angles — in degrees, fixed-radix degrees, or
 radians — and only the internal `fr_cos_bam` / `fr_sin_bam`
 primitives see the `u16`. In practice you will never construct a
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 54b7f17..2fefb6d 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -13,8 +13,8 @@ manager integration and no install step. Either:
 
 - Copy `src/FR_math.c`, `src/FR_math.h`,
   `src/FR_defs.h` (and optionally
-  `src/FR_math_2D.cpp`, `src/FR_math_2D.h`,
-  and `src/FR_trig_table.h`) into the target project, **or**
+  `src/FR_math_2D.cpp`, `src/FR_math_2D.h`)
+  into the target project, **or**
 - Add FR_Math as a git submodule and point the build system at
   `src/`.
 
@@ -29,7 +29,7 @@ cd fr_math
 
 `build.sh` wipes `build/`, rebuilds the
 library, examples, and tests, and runs the full test suite. On success
-the output shows 42 tests passing across six test binaries.
+the output shows all tests passing (99% line coverage).
 
 ## A first program
 
@@ -274,8 +274,9 @@ make test           # build + run every test suite
 make coverage       # coverage report (requires gcov)
 ```
 
-As of v2.0.1, FR_Math ships with 42 passing tests and 99% line
-coverage across the library sources.
+Run `make test` for a full pass. With `make coverage`, line coverage of
+the library sources is about **99%**. See [Building & Testing](building.md)
+for targets, cross-compilation, and CI.
 
 ## Next steps
 
@@ -284,7 +285,7 @@ coverage across the library sources.
   conventions work.
 - **[API Reference](api-reference.md)**
   — per-symbol inputs, outputs, precision, and saturation
-  behaviour.
+  behavior.
 - **[Examples](examples.md)** —
   runnable snippets for common tasks.
 - **[Building & Testing](building.md)**
diff --git a/docs/releases.md b/docs/releases.md
index 277b811..734eb1b 100644
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -4,16 +4,27 @@ Release highlights. For the full per-symbol change log, see
 [release_notes.md](https://github.com/deftio/fr_math/blob/master/release_notes.md)
 in the repo.
 
+## v2.0.8 — 2026
+
+Tangent accuracy rewrite and trig rounding fix.
+
+- **BAM-native tangent**: new `fr_tan_bam(u16 bam)` with 65-entry octant table (130 bytes). No 64-bit math. `FR_TanI`, `FR_Tan`, `fr_tan` are now thin wrappers.
+- **Round-to-nearest fix**: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy.
+- **Conversion macro trimming**: `FR_DEG2BAM` and `FR_RAD2BAM` reduced to ~18-21 bits (from ~28 bits). Verified: no measurable accuracy impact.
+- **`FR_TRIG_MINVAL` fixed**: now `-FR_TRIG_MAXVAL` (was `-FR_TRIG_MASK`)
+
+---
+
 ## v2.0.7 — 2026
 
 README restructure, accuracy table cleanup, expanded cross-compile support.
 
 - **`FR_CORE_ONLY` convenience define** — single `#define` strips both print helpers and wave generators
 - **Accuracy table cleanup** — removed LSB column (percent error is the user-facing metric)
-- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build
-- **Two-column size table** — Core (`-DFR_CORE_ONLY`) vs Full for every target
-- **`scripts/update_sizes.sh`** — auto-patches size tables from `build/sizes.csv`
-- README reordered: accuracy table first, then function list, then size table
+- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build
+- **Three-column size table** — Lean / Core / Full for every target, sorted 8-bit → 64-bit
+- **Consolidated `scripts/crossbuild_sizes.sh`** — single script runs Docker, builds, writes CSV + markdown, patches docs (replaces three separate scripts)
+- README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions
 
 ---
 
@@ -124,7 +135,7 @@ with v2.0.0 except where noted.
 - `FR_DIV(x, xr, y, yr)` — fixed-point division with 64-bit
   pre-scaling. Now **rounds to nearest** (≤ 0.5 LSB error)
   instead of truncating. `FR_DIV_TRUNC` preserves the old
-  truncating behaviour for backward compatibility. `FR_DIV32` is
+  truncating behavior for backward compatibility. `FR_DIV32` is
   the 32-bit-only truncating path.
 - `FR_MOD(x, xr, y, yr)` — fixed-point modulus.
 
@@ -149,7 +160,7 @@ with v2.0.0 except where noted.
 | FR_atan signature | `(input, radix)` → s16 degrees | `(input, radix, out_radix)` → s32 radians |
 | FR_atan2 signature | `(y, x)` → s16 degrees | `(y, x, out_radix)` → s32 radians |
 | FR_BAM2RAD | off by 1024× (bug) | correct |
-| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behaviour) |
+| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behavior) |
 
 ---
 
@@ -187,7 +198,7 @@ mandatory.
   dropped.
 - `FR_atan`, `FR_Tan`, `FR_TanI`:
   wiring and overflow fixes.
-- `FR_printNumD/F/H`: fixed undefined behaviour on
+- `FR_printNumD/F/H`: fixed undefined behavior on
   `INT_MIN` and a broken fraction extraction in the
   v1 code.
 - `FR_DEG2RAD` / `FR_RAD2DEG`: macro bodies
@@ -205,7 +216,7 @@ mandatory.
   `FR_BAM2DEG`, `FR_RAD2BAM`,
   `FR_BAM2RAD`. BAM (16 bits per full circle) is the
   natural integer representation for phase accumulators and
-  gives zero quantisation at the wraparound.
+  gives zero quantization at the wraparound.
 - **Square root and hypot**: `FR_sqrt`
   uses a digit-by-digit integer isqrt on `int64_t`;
   `FR_hypot` computes `sqrt(x² + y²)`
@@ -259,10 +270,9 @@ mandatory.
 
 ### Test suite
 
-v2 ships with **42 tests** across six test binaries
-and a characterisation suite (`test_tdd.cpp`) that pins
-numerical behaviour to bit-exact reference values. Overall line
-coverage is **99%** on the library sources.
+v2 ships with a full test suite covering **99%** of library
+source lines, plus a characterization suite (`test_tdd.cpp`)
+that pins numerical behavior to bit-exact reference values.
 
 ## v1.0.3 — 2025
 
@@ -298,6 +308,5 @@ FR_Math has been in continuous service since **2000**,
 when it was written to run 2D graphics transforms on 16 MHz 68k
 Palm Pilots for Trumpetsoft's *Inkstorm*. It has since
 been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and
-16-bit embedded targets. v2.0.7 is the current release with a
-full test suite, a bit-exact numerical specification, and CI on
-every push.
+16-bit embedded targets. The current release has a full test
+suite, a bit-exact numerical specification, and CI on every push.
diff --git a/examples/README.md b/examples/README.md
index c06fc5c..09f2e7f 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,5 +1,8 @@
 # FR_Math Examples
 
+This directory contains examples on a few platforms for seeing how FR_math works.  For embedded platforms use of the built-in printxxx functions can help provide readable output in various radix formats.
+
+
 ## Arduino Examples
 
 Each example is a self-contained `.ino` sketch that prints results to
@@ -13,17 +16,31 @@ or PlatformIO, then open any example from **File > Examples > FR_Math**.
 | [wave-generators](wave-generators/wave-generators.ino) | sqr, tri, saw, PWM, noise, ADSR envelope |
 | [arduino_smoke](arduino_smoke/arduino_smoke.ino) | Compile-only smoke test — exercises every function group |
 
-## Desktop / POSIX Example
+## Desktop / POSIX Examples
+
+Each desktop example is self-contained with its own `Makefile` and `README.md`.
+Build artifacts stay within the example's directory.
 
 | Example | What it shows |
 |---|---|
-| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms (requires `<stdio.h>`, `<math.h>`) |
+| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms |
+| [fixed-point-basics](fixed-point-basics/) | Educational walkthrough: radix, conversions, add/sub/mul/div, saturation, formatted output |
+| [log-exp-curves](log-exp-curves/) | Sweep log2/ln/log10, pow2/exp/pow10, sqrt with error tables vs IEEE double |
+| [waveform-synth](waveform-synth/) | Wave generators + ADSR envelope with ASCII art and CSV output modes |
+| [trig-accuracy](trig-accuracy/) | FR_Math vs libfixmath trig accuracy comparison (requires libfixmath source) |
+
+Build all from the repo root:
+
+```bash
+make examples        # builds all desktop examples
+make run-examples    # builds and runs examples 1-3, plus 4 if libfixmath present
+```
 
-Build the POSIX example with:
+Or build any single example from its directory:
 
 ```bash
-make examples        # produces build/fr_example
-./build/fr_example
+cd examples/fixed-point-basics
+make run
 ```
 
 ## Using FR_Math in Arduino
diff --git a/examples/arduino_smoke/README.md b/examples/arduino_smoke/README.md
new file mode 100644
index 0000000..304683e
--- /dev/null
+++ b/examples/arduino_smoke/README.md
@@ -0,0 +1,36 @@
+# Arduino Smoke Test
+
+Compile-only smoke test that exercises every major FR_Math function group
+to verify that `FR_math.c` links cleanly on AVR (avr-gcc) and ARM targets.
+No specific hardware required beyond a board that compiles.
+
+## What it tests
+
+- Conversions and arithmetic: `I2FR`, `FR_ADD`, `FR_DIV`, `FR_DIV_TRUNC`, `FR_MOD`
+- Integer-degree trig: `FR_CosI`, `FR_SinI`
+- BAM and radian trig: `fr_cos_bam`, `fr_sin_bam`, `fr_cos`, `fr_sin`
+- Inverse trig: `FR_atan2`, `FR_acos`
+- Log / exp: `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`
+- Shift-only variants: `FR_EXP_FAST`, `FR_POW10_FAST`
+- Roots: `FR_sqrt`, `FR_hypot`, `FR_hypot_fast`, `FR_hypot_fast8`
+- Wave generators: `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise`
+- ADSR envelope: `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release`
+- String parsing: `FR_numstr`
+
+## Building
+
+**Arduino CLI** (no upload needed — compile-only test):
+
+```bash
+arduino-cli compile --fqbn arduino:avr:uno examples/arduino_smoke
+```
+
+If it compiles without errors, all function groups link correctly.
+
+## Expected serial output
+
+If uploaded and run:
+
+```
+FR_Math smoke test: all functions linked OK
+```
diff --git a/examples/basic-math/README.md b/examples/basic-math/README.md
new file mode 100644
index 0000000..5d1cc20
--- /dev/null
+++ b/examples/basic-math/README.md
@@ -0,0 +1,63 @@
+# Basic Math — Arduino Example
+
+Demonstrates fixed-point arithmetic fundamentals on Arduino:
+conversions, add, subtract, multiply, divide, and utility macros.
+
+## What it demonstrates
+
+| Operation | Functions / macros |
+|-----------|-------------------|
+| Integer to fixed-point | `I2FR`, `FR_INT` |
+| Addition / subtraction | `FR_ADD`, `FR_SUB` |
+| Multiplication | `FR_FixMuls` (round-to-nearest) |
+| Division | `FR_DIV` (64-bit, rounded) |
+| Constant construction | `FR_NUM` (build 3.14159 from parts) |
+| String parsing | `FR_numstr` |
+| Utility macros | `FR_ABS`, `FR_MIN`, `FR_MAX`, `FR_CLAMP` |
+| Radix change | `FR_CHRDX` |
+
+## Hardware
+
+Any Arduino board with a serial port. Output at 9600 baud.
+
+## Building
+
+**Arduino IDE**: Open `basic-math.ino` from **File > Examples > FR_Math > basic-math**.
+
+**Arduino CLI**:
+
+```bash
+arduino-cli compile --fqbn arduino:avr:uno examples/basic-math
+arduino-cli upload  --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/basic-math
+arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600
+```
+
+**PlatformIO**:
+
+```bash
+pio run -e uno
+pio run -e uno -t upload
+pio device monitor -b 9600
+```
+
+## Expected serial output
+
+```
+=== FR_Math Basic Arithmetic ===
+
+a = 100
+b = 37
+a + b = 137
+a - b = 63
+a * b = 3700
+a / b = 2
+pi  ~ 3
+parsed "-12.75" = -12
+abs(-5)    = 5
+min(3,7)   = 3
+max(3,7)   = 7
+clamp(9,0,5) = 5
+42 @ radix 12 -> radix 8: 42
+
+Done.
+```
diff --git a/examples/fixed-point-basics/Makefile b/examples/fixed-point-basics/Makefile
new file mode 100644
index 0000000..e0eaf83
--- /dev/null
+++ b/examples/fixed-point-basics/Makefile
@@ -0,0 +1,29 @@
+# fixed-point-basics — self-contained build
+# All artifacts stay in this directory.
+#
+# Usage:
+#   make          Build the example
+#   make run      Build and run
+#   make clean    Remove build artifacts
+
+CC  ?= gcc
+CXX ?= g++
+SRC_DIR = ../../src
+
+CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os
+LDFLAGS  = -lm
+
+TARGET = fixed_point_basics
+
+.PHONY: all run clean
+
+all: $(TARGET)
+
+$(TARGET): fixed_point_basics.cpp $(SRC_DIR)/FR_math.c
+	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
+
+run: $(TARGET)
+	./$(TARGET)
+
+clean:
+	rm -f $(TARGET) *.o *.gcda *.gcno
diff --git a/examples/fixed-point-basics/README.md b/examples/fixed-point-basics/README.md
new file mode 100644
index 0000000..eedd81a
--- /dev/null
+++ b/examples/fixed-point-basics/README.md
@@ -0,0 +1,63 @@
+# Fixed-Point Basics
+
+An educational walkthrough of fixed-point fundamentals using the FR_Math
+library. This example is self-contained and builds independently.
+
+## What it demonstrates
+
+| Section | Topic | Key functions / macros |
+|---------|-------|----------------------|
+| A | Same integer interpreted at radixes 0-15 | `FR2D` |
+| B | Integer-to-fixed and back round-trip | `I2FR`, `FR2I` |
+| C | Constructing fractional constants (pi) | `FR_NUM` |
+| D | Add / subtract with radix alignment | `FR_ADD`, `FR_CHRDX` |
+| E | Multiply: precision doubling and truncation | `FR_CHRDX` |
+| F | Division (64-bit and 32-bit variants) | `FR_DIV`, `FR_DIV32` |
+| G | Saturation vs overflow | `FR_FixAddSat`, `FR_FixMulSat` |
+| H | Formatted output via putchar callback | `FR_printNumF` |
+
+## Building
+
+```bash
+make            # compiles fixed_point_basics
+make run        # compiles and runs
+make clean      # removes build artifacts
+```
+
+Or compile manually:
+
+```bash
+g++ -I../../src -Wall -Os fixed_point_basics.cpp ../../src/FR_math.c -lm -o fixed_point_basics
+```
+
+## Expected output
+
+The program prints a series of labeled sections. Section A shows how
+the same raw integer (1234) maps to different floating-point values as
+the radix changes. Subsequent sections print tables comparing FR_Math
+operations against expected values, demonstrating precision, alignment
+rules, and saturation behavior.
+
+```
+FR_Math — Fixed-Point Basics  (v...)
+
+========================================
+  A. Same integer at radixes 0-15
+========================================
+
+  Raw integer value: 1234
+  ...
+
+========================================
+  H. FR_printNumF — formatted fixed-point printing
+========================================
+  ...
+
+--- end ---
+```
+
+## Dependencies
+
+- A C++ compiler (g++ or clang++)
+- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`)
+- Standard C math library (`-lm`, used only for `FR2D` debug macro)
diff --git a/examples/fixed-point-basics/fixed_point_basics b/examples/fixed-point-basics/fixed_point_basics
new file mode 100755
index 0000000..87578da
Binary files /dev/null and b/examples/fixed-point-basics/fixed_point_basics differ
diff --git a/examples/fixed-point-basics/fixed_point_basics.cpp b/examples/fixed-point-basics/fixed_point_basics.cpp
new file mode 100644
index 0000000..2499ed6
--- /dev/null
+++ b/examples/fixed-point-basics/fixed_point_basics.cpp
@@ -0,0 +1,236 @@
+/**
+ * fixed_point_basics.cpp — Educational walkthrough of fixed-point fundamentals
+ *
+ * Demonstrates: radix interpretation, I2FR/FR2I conversions, FR_NUM construction,
+ * add/sub alignment, multiply precision, division, saturation, and FR_printNumF.
+ *
+ * Build:  make ex_basics
+ * Run:    ./build/ex_basics
+ *
+ * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h)
+ */
+
+#include <stdio.h>
+#include <math.h>
+
+#include "FR_defs.h"
+#include "FR_math.h"
+
+/* putchar callback for FR_printNumF */
+static int put_char(char c) { return putchar(c); }
+
+/* ------------------------------------------------------------------ */
+static void section(const char *label)
+{
+    printf("\n========================================\n");
+    printf("  %s\n", label);
+    printf("========================================\n\n");
+}
+
+/* ================================================================== */
+int main()
+{
+    printf("FR_Math — Fixed-Point Basics  (v%s)\n", FR_MATH_VERSION);
+
+    /* -------------------------------------------------------------- */
+    /* A. Same integer at different radixes                            */
+    /* -------------------------------------------------------------- */
+    section("A. Same integer at radixes 0-15");
+
+    s32 val = 1234;
+    printf("  Raw integer value: %d\n\n", (int)val);
+    printf("  radix  float-equiv    precision (1/2^r)\n");
+    printf("  -----  -----------    -------------------\n");
+    for (int r = 0; r <= 15; r++) {
+        printf("  %5d  %11.5f    1/%-5d = %.8f\n",
+               r, FR2D(val, r), 1 << r, FR2D(1, r));
+    }
+
+    /* -------------------------------------------------------------- */
+    /* B. I2FR / FR2I round-trip conversions                          */
+    /* -------------------------------------------------------------- */
+    section("B. I2FR / FR2I round-trip conversions");
+
+    int test_ints[] = {0, 1, -1, 42, -100, 32767};
+    int n_tests = (int)(sizeof(test_ints) / sizeof(test_ints[0]));
+    int rdx = 12;
+    printf("  Radix = %d\n\n", rdx);
+    printf("  int   ->  I2FR(int,%d)  ->  FR2I(fr,%d)  ->  float-equiv\n", rdx, rdx);
+    printf("  ----      -----------       -----------       -----------\n");
+    for (int i = 0; i < n_tests; i++) {
+        s32 fr = I2FR(test_ints[i], rdx);
+        s32 back = FR2I(fr, rdx);
+        printf("  %6d    %11d       %11d       %11.4f\n",
+               test_ints[i], (int)fr, (int)back, FR2D(fr, rdx));
+    }
+
+    /* -------------------------------------------------------------- */
+    /* C. FR_NUM — construct pi (3.14159) at radix 12                 */
+    /* -------------------------------------------------------------- */
+    section("C. FR_NUM — construct fractional constants");
+
+    rdx = 12;
+    s32 pi_fr = FR_NUM(3, 14159, 5, rdx);
+    s32 neg_half = FR_NUM(0, 5, 1, rdx);
+    neg_half = -neg_half; /* -0.5 */
+    printf("  pi at radix %d:   raw = %d,  float = %.6f  (ref %.6f)\n",
+           rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159);
+    printf("  -0.5 at radix %d: raw = %d,  float = %.6f\n",
+           rdx, (int)neg_half, FR2D(neg_half, rdx));
+
+    rdx = 16;
+    pi_fr = FR_NUM(3, 14159, 5, rdx);
+    printf("  pi at radix %d:  raw = %d,  float = %.6f  (ref %.6f)\n",
+           rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159);
+
+    /* -------------------------------------------------------------- */
+    /* D. Add/sub with aligned radix                                  */
+    /* -------------------------------------------------------------- */
+    section("D. Addition / subtraction with radix alignment");
+
+    {
+        int ra = 10, rb = 10;
+        s32 a = FR_NUM(2, 5, 1, ra);   /* 2.5 at radix 10 */
+        s32 b = FR_NUM(1, 25, 2, rb);  /* 1.25 at radix 10 */
+        s32 sum = a + b;               /* same radix — direct add */
+        printf("  Same radix (%d):\n", ra);
+        printf("    2.5 + 1.25 = %.4f  (raw: %d + %d = %d)\n",
+               FR2D(sum, ra), (int)a, (int)b, (int)sum);
+
+        /* Different radixes — must align first */
+        int ra2 = 8, rb2 = 12;
+        s32 a2 = FR_NUM(2, 5, 1, ra2);   /* 2.5 at radix 8 */
+        s32 b2 = FR_NUM(1, 25, 2, rb2);  /* 1.25 at radix 12 */
+
+        /* Wrong: adding directly without alignment */
+        s32 wrong = a2 + b2;
+        printf("\n  Different radixes (a=r%d, b=r%d):\n", ra2, rb2);
+        printf("    WRONG (no align):   raw %d + %d = %d  (%.4f ?""?)\n",
+               (int)a2, (int)b2, (int)wrong, FR2D(wrong, ra2));
+
+        /* Right: use FR_ADD which aligns b to a's radix */
+        s32 a2_copy = a2;
+        FR_ADD(a2_copy, ra2, b2, rb2);
+        printf("    RIGHT (FR_ADD):     result raw = %d  (%.4f)\n",
+               (int)a2_copy, FR2D(a2_copy, ra2));
+    }
+
+    /* -------------------------------------------------------------- */
+    /* E. Multiply — precision doubling and truncation                */
+    /* -------------------------------------------------------------- */
+    section("E. Multiply — precision doubling, FR_CHRDX");
+
+    {
+        int r = 12;
+        s32 a = FR_NUM(3, 5, 1, r);    /* 3.5  at radix 12 */
+        s32 b = FR_NUM(2, 25, 2, r);   /* 2.25 at radix 12 */
+        s32 product = a * b;            /* result is at radix 24 (12+12) */
+        printf("  3.5 * 2.25 at radix %d:\n", r);
+        printf("    a = %d (%.4f), b = %d (%.4f)\n",
+               (int)a, FR2D(a, r), (int)b, FR2D(b, r));
+        printf("    product raw = %d  at radix %d  (%.6f)\n",
+               (int)product, 2 * r, FR2D(product, 2 * r));
+        printf("    ref = %.6f\n", 3.5 * 2.25);
+
+        /* Truncate back to original radix */
+        s32 truncated = FR_CHRDX(product, 2 * r, r);
+        printf("    FR_CHRDX to radix %d: %d (%.4f)\n",
+               r, (int)truncated, FR2D(truncated, r));
+    }
+
+    /* -------------------------------------------------------------- */
+    /* F. Division                                                    */
+    /* -------------------------------------------------------------- */
+    section("F. Division (FR_DIV, FR_DIV32)");
+
+    {
+        int r = 16;
+        s32 a = I2FR(7, r);
+        s32 b = I2FR(3, r);
+        s32 q64 = FR_DIV(a, r, b, r);    /* 64-bit intermediate, rounded */
+        printf("  7 / 3 at radix %d:\n", r);
+        printf("    FR_DIV  (64-bit, rounded):   %d  (%.6f)\n",
+               (int)q64, FR2D(q64, r));
+        printf("    ref = %.6f\n", 7.0 / 3.0);
+
+        /* FR_DIV32 works when x << yr fits in s32.
+         * At radix 8, x=7 → x<<8 = 1792, well within range. */
+        int r8 = 8;
+        a = I2FR(7, r8);
+        b = I2FR(3, r8);
+        s32 q32 = FR_DIV32(a, r8, b, r8);
+        printf("\n  7 / 3 at radix %d (FR_DIV32, 32-bit only):\n", r8);
+        printf("    FR_DIV32: %d  (%.6f)   ref = %.6f\n",
+               (int)q32, FR2D(q32, r8), 7.0 / 3.0);
+
+        a = I2FR(22, r);
+        b = I2FR(7, r);
+        q64 = FR_DIV(a, r, b, r);
+        printf("\n  22 / 7 at radix %d:\n", r);
+        printf("    FR_DIV:  %d  (%.6f)   ref = %.6f\n",
+               (int)q64, FR2D(q64, r), 22.0 / 7.0);
+    }
+
+    /* -------------------------------------------------------------- */
+    /* G. Saturation — overflow vs saturate                           */
+    /* -------------------------------------------------------------- */
+    section("G. Saturation (FR_FixAddSat, FR_FixMulSat)");
+
+    {
+        s32 big = 0x70000000;
+        s32 also_big = 0x20000000;
+        s32 raw_add = big + also_big;           /* overflows! */
+        s32 sat_add = FR_FixAddSat(big, also_big);
+
+        printf("  Addition overflow:\n");
+        printf("    0x%08X + 0x%08X\n", (unsigned)big, (unsigned)also_big);
+        printf("    raw add:       0x%08X  (%d) — OVERFLOW!\n",
+               (unsigned)raw_add, (int)raw_add);
+        printf("    FR_FixAddSat:  0x%08X  (%d) — saturated\n",
+               (unsigned)sat_add, (int)sat_add);
+
+        s32 x = 50000;
+        s32 y = 50000;
+        s32 raw_mul = x * y;                     /* overflows at 32-bit */
+        s32 sat_mul = FR_FixMulSat(x, y);
+
+        printf("\n  Multiply overflow (as s15.16):\n");
+        printf("    %d * %d\n", (int)x, (int)y);
+        printf("    raw mul:       %d — OVERFLOW!\n", (int)raw_mul);
+        printf("    FR_FixMulSat:  %d  (%.4f as s15.16) — saturated\n",
+               (int)sat_mul, FR2D(sat_mul, 16));
+    }
+
+    /* -------------------------------------------------------------- */
+    /* H. FR_printNumF formatted output                               */
+    /* -------------------------------------------------------------- */
+    section("H. FR_printNumF — formatted fixed-point printing");
+
+    {
+        int r = 13;
+        s32 z  = (s32)(123.456 * (1 << r));
+        s32 zn = -z;
+
+        printf("  z  = %d (raw), float = %.4f\n", (int)z, FR2D(z, r));
+        printf("  zn = %d (raw), float = %.4f\n\n", (int)zn, FR2D(zn, r));
+
+        printf("  FR_printNumF(z,  r=%d, pad=6, prec=3): ", r);
+        FR_printNumF(put_char, z, r, 6, 3);
+        printf("\n");
+
+        printf("  FR_printNumF(zn, r=%d, pad=6, prec=3): ", r);
+        FR_printNumF(put_char, zn, r, 6, 3);
+        printf("\n");
+
+        printf("  FR_printNumF(z,  r=%d, pad=8, prec=5): ", r);
+        FR_printNumF(put_char, z, r, 8, 5);
+        printf("\n");
+
+        printf("  FR_printNumF(zn, r=%d, pad=8, prec=5): ", r);
+        FR_printNumF(put_char, zn, r, 8, 5);
+        printf("\n");
+    }
+
+    printf("\n--- end ---\n");
+    return 0;
+}
diff --git a/examples/log-exp-curves/Makefile b/examples/log-exp-curves/Makefile
new file mode 100644
index 0000000..6279a60
--- /dev/null
+++ b/examples/log-exp-curves/Makefile
@@ -0,0 +1,29 @@
+# log-exp-curves — self-contained build
+# All artifacts stay in this directory.
+#
+# Usage:
+#   make          Build the example
+#   make run      Build and run
+#   make clean    Remove build artifacts
+
+CC  ?= gcc
+CXX ?= g++
+SRC_DIR = ../../src
+
+CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os
+LDFLAGS  = -lm
+
+TARGET = log_exp_curves
+
+.PHONY: all run clean
+
+all: $(TARGET)
+
+$(TARGET): log_exp_curves.cpp $(SRC_DIR)/FR_math.c
+	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
+
+run: $(TARGET)
+	./$(TARGET)
+
+clean:
+	rm -f $(TARGET) *.o *.gcda *.gcno
diff --git a/examples/log-exp-curves/README.md b/examples/log-exp-curves/README.md
new file mode 100644
index 0000000..133cd7c
--- /dev/null
+++ b/examples/log-exp-curves/README.md
@@ -0,0 +1,63 @@
+# Log / Exp / Sqrt Curves
+
+Sweeps the FR_Math logarithmic, exponential, and square-root functions,
+printing comparison tables against IEEE 754 double-precision reference
+values with per-point and summary error statistics.
+
+## What it demonstrates
+
+| Table | Functions tested | Input range |
+|-------|-----------------|-------------|
+| Log | `FR_log2`, `FR_ln`, `FR_log10` | 0.25 to 10.0 (9 selected points) |
+| Exp | `FR_pow2`, `FR_EXP`, `FR_POW10` | -3.0 to 3.0 in 0.5 steps |
+| Sqrt | `FR_sqrt` | 0.25 to 100.0 (15 selected points) |
+
+Each table shows: `input | FR_result | reference | error%`
+
+A summary line per function gives max |error%| and avg |error%|.
+
+## Building
+
+```bash
+make            # compiles log_exp_curves
+make run        # compiles and runs
+make clean      # removes build artifacts
+```
+
+Or compile manually:
+
+```bash
+g++ -I../../src -Wall -Os log_exp_curves.cpp ../../src/FR_math.c -lm -o log_exp_curves
+```
+
+## Expected output
+
+```
+FR_Math — Log / Exp / Sqrt Curves  (v..., radix=16)
+
+========================================
+  Log functions (input > 0)
+========================================
+
+  input    | FR_log2      ref_log2     err%     | ...
+  ...
+  FR_log2   max |err|: ...   avg |err|: ...
+
+========================================
+  Exp functions (input -3.0 to 3.0)
+========================================
+  ...
+
+========================================
+  Square root (FR_sqrt)
+========================================
+  ...
+
+--- end ---
+```
+
+## Dependencies
+
+- A C++ compiler (g++ or clang++)
+- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`)
+- Standard C math library (`-lm`, for double-precision reference values)
diff --git a/examples/log-exp-curves/log_exp_curves b/examples/log-exp-curves/log_exp_curves
new file mode 100755
index 0000000..92619fb
Binary files /dev/null and b/examples/log-exp-curves/log_exp_curves differ
diff --git a/examples/log-exp-curves/log_exp_curves.cpp b/examples/log-exp-curves/log_exp_curves.cpp
new file mode 100644
index 0000000..1689b8e
--- /dev/null
+++ b/examples/log-exp-curves/log_exp_curves.cpp
@@ -0,0 +1,192 @@
+/**
+ * log_exp_curves.cpp — Sweep log/exp/sqrt functions with comparison tables
+ *
+ * For each function group, prints a table of FR_math result vs IEEE double
+ * reference with error%, then a summary line (max_err%, avg_err%).
+ *
+ * Build:  make ex_logexp
+ * Run:    ./build/ex_logexp
+ *
+ * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h)
+ */
+
+#include <stdio.h>
+#include <math.h>
+
+#include "FR_defs.h"
+#include "FR_math.h"
+
+#define R 16  /* working radix for all tests */
+
+static double pct_err(double measured, double ref)
+{
+    if (fabs(ref) < 1e-12)
+        return fabs(measured) * 100.0;
+    return ((measured - ref) / ref) * 100.0;
+}
+
+/* Accumulator for max/avg error tracking */
+typedef struct {
+    double max_abs_pct;
+    double sum_abs_pct;
+    int    n;
+} err_stats_t;
+
+static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; }
+
+static void err_add(err_stats_t *e, double pct)
+{
+    double a = fabs(pct);
+    if (a > e->max_abs_pct) e->max_abs_pct = a;
+    e->sum_abs_pct += a;
+    e->n++;
+}
+
+static void err_print(err_stats_t *e, const char *label)
+{
+    printf("  %s  max |err|: %.4f%%   avg |err|: %.4f%%   (%d points)\n",
+           label, e->max_abs_pct,
+           e->n > 0 ? e->sum_abs_pct / e->n : 0.0, e->n);
+}
+
+/* ------------------------------------------------------------------ */
+static void section(const char *label)
+{
+    printf("\n========================================\n");
+    printf("  %s\n", label);
+    printf("========================================\n\n");
+}
+
+/* ================================================================== */
+int main()
+{
+    printf("FR_Math — Log / Exp / Sqrt Curves  (v%s, radix=%d)\n", FR_MATH_VERSION, R);
+
+    /* -------------------------------------------------------------- */
+    /* Log functions: FR_log2, FR_ln, FR_log10                        */
+    /* -------------------------------------------------------------- */
+    section("Log functions (input > 0)");
+
+    double log_inputs[] = {0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.0, 10.0};
+    int n_log = (int)(sizeof(log_inputs) / sizeof(log_inputs[0]));
+
+    err_stats_t e_log2, e_ln, e_log10;
+    err_reset(&e_log2); err_reset(&e_ln); err_reset(&e_log10);
+
+    printf("  %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n",
+           "input", "FR_log2", "ref_log2", "err%",
+           "FR_ln", "ref_ln", "err%",
+           "FR_log10", "ref_log10", "err%");
+    printf("  %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n",
+           "--------", "------------", "------------", "--------",
+           "------------", "------------", "--------",
+           "------------", "------------", "--------");
+
+    for (int i = 0; i < n_log; i++) {
+        double x = log_inputs[i];
+        s32 xfr = (s32)(x * (1 << R));
+
+        double fr_l2  = FR2D(FR_log2(xfr, R, R), R);
+        double fr_ln  = FR2D(FR_ln(xfr, R, R), R);
+        double fr_l10 = FR2D(FR_log10(xfr, R, R), R);
+
+        double ref_l2  = log2(x);
+        double ref_ln  = log(x);
+        double ref_l10 = log10(x);
+
+        double e2 = pct_err(fr_l2, ref_l2);
+        double en = pct_err(fr_ln, ref_ln);
+        double e10 = pct_err(fr_l10, ref_l10);
+
+        err_add(&e_log2, e2);
+        err_add(&e_ln, en);
+        err_add(&e_log10, e10);
+
+        printf("  %-8.4f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n",
+               x, fr_l2, ref_l2, e2, fr_ln, ref_ln, en, fr_l10, ref_l10, e10);
+    }
+
+    printf("\n");
+    err_print(&e_log2,  "FR_log2 ");
+    err_print(&e_ln,    "FR_ln   ");
+    err_print(&e_log10, "FR_log10");
+
+    /* -------------------------------------------------------------- */
+    /* Exp functions: FR_pow2, FR_EXP, FR_POW10                       */
+    /* -------------------------------------------------------------- */
+    section("Exp functions (input -3.0 to 3.0)");
+
+    err_stats_t e_pow2, e_exp, e_pow10;
+    err_reset(&e_pow2); err_reset(&e_exp); err_reset(&e_pow10);
+
+    printf("  %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n",
+           "input", "FR_pow2", "ref_pow2", "err%",
+           "FR_EXP", "ref_exp", "err%",
+           "FR_POW10", "ref_pow10", "err%");
+    printf("  %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n",
+           "--------", "------------", "------------", "--------",
+           "------------", "------------", "--------",
+           "------------", "------------", "--------");
+
+    for (double x = -3.0; x <= 3.001; x += 0.5) {
+        s32 xfr = (s32)(x * (1 << R));
+
+        double fr_p2  = FR2D(FR_pow2(xfr, R), R);
+        double fr_ex  = FR2D(FR_EXP(xfr, R), R);
+        double fr_p10 = FR2D(FR_POW10(xfr, R), R);
+
+        double ref_p2  = pow(2.0, x);
+        double ref_ex  = exp(x);
+        double ref_p10 = pow(10.0, x);
+
+        double ep2 = pct_err(fr_p2, ref_p2);
+        double eex = pct_err(fr_ex, ref_ex);
+        double ep10 = pct_err(fr_p10, ref_p10);
+
+        err_add(&e_pow2, ep2);
+        err_add(&e_exp, eex);
+        err_add(&e_pow10, ep10);
+
+        printf("  %-8.2f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n",
+               x, fr_p2, ref_p2, ep2, fr_ex, ref_ex, eex, fr_p10, ref_p10, ep10);
+    }
+
+    printf("\n");
+    err_print(&e_pow2,  "FR_pow2 ");
+    err_print(&e_exp,   "FR_EXP  ");
+    err_print(&e_pow10, "FR_POW10");
+
+    /* -------------------------------------------------------------- */
+    /* Square root: FR_sqrt                                           */
+    /* -------------------------------------------------------------- */
+    section("Square root (FR_sqrt)");
+
+    double sqrt_inputs[] = {0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0,
+                            9.0, 10.0, 16.0, 25.0, 50.0, 64.0, 100.0};
+    int n_sqrt = (int)(sizeof(sqrt_inputs) / sizeof(sqrt_inputs[0]));
+
+    err_stats_t e_sqrt;
+    err_reset(&e_sqrt);
+
+    printf("  %-10s | %-14s %-14s %-8s\n",
+           "input", "FR_sqrt", "ref_sqrt", "err%");
+    printf("  %-10s-+-%-14s-%-14s-%-8s\n",
+           "----------", "--------------", "--------------", "--------");
+
+    for (int i = 0; i < n_sqrt; i++) {
+        double x = sqrt_inputs[i];
+        s32 xfr = (s32)(x * (1 << R));
+        double fr_sq = FR2D(FR_sqrt(xfr, R), R);
+        double ref_sq = sqrt(x);
+        double esq = pct_err(fr_sq, ref_sq);
+        err_add(&e_sqrt, esq);
+        printf("  %-10.4f | %14.6f %14.6f %7.3f%%\n",
+               x, fr_sq, ref_sq, esq);
+    }
+
+    printf("\n");
+    err_print(&e_sqrt, "FR_sqrt ");
+
+    printf("\n--- end ---\n");
+    return 0;
+}
diff --git a/examples/posix-example/Makefile b/examples/posix-example/Makefile
new file mode 100644
index 0000000..ebb3213
--- /dev/null
+++ b/examples/posix-example/Makefile
@@ -0,0 +1,29 @@
+# posix-example — self-contained build
+# All artifacts stay in this directory.
+#
+# Usage:
+#   make          Build the example
+#   make run      Build and run
+#   make clean    Remove build artifacts
+
+CC  ?= gcc
+CXX ?= g++
+SRC_DIR = ../../src
+
+CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os
+LDFLAGS  = -lm
+
+TARGET = FR_Math_Example1
+
+.PHONY: all run clean
+
+all: $(TARGET)
+
+$(TARGET): FR_Math_Example1.cpp $(SRC_DIR)/FR_math.c $(SRC_DIR)/FR_math_2D.cpp
+	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
+
+run: $(TARGET)
+	./$(TARGET)
+
+clean:
+	rm -f $(TARGET) *.o *.gcda *.gcno
diff --git a/examples/posix-example/README.md b/examples/posix-example/README.md
new file mode 100644
index 0000000..0c3bb4e
--- /dev/null
+++ b/examples/posix-example/README.md
@@ -0,0 +1,46 @@
+# POSIX Example — Comprehensive FR_Math Demo
+
+A full-featured desktop demo that exercises most of the FR_Math library
+including fixed-point arithmetic, trig functions, error statistics,
+2D matrix transforms, and formatted printing.
+
+## What it demonstrates
+
+| Topic | Functions / macros used |
+|-------|----------------------|
+| Overflow in 8-bit arithmetic | Raw C multiply showing wrap-around |
+| Radix interpretation | `FR2D`, printing same integer at radixes 0-14 |
+| Addition (with saturation) | `FR_FixAddSat` |
+| Multiplication (with saturation) | `FR_FixMulSat` |
+| 2D matrix transforms | `FR_Matrix2D_CPT`: translate, rotate, inverse, `XFormPtI`, `XFormPtI16` |
+| Radix precision effects | Comparing radix 6 vs 11 for round-trip accuracy |
+| Forward trig (optional) | `FR_CosI`, `FR_SinI`, `FR_TanI` sweep with error stats |
+| Radian trig (optional) | `FR_cos` radian-native path |
+| Inverse trig (optional) | `FR_acos` |
+| Power / log (optional) | `FR_pow2`, `FR_EXP`, `FR_POW10` sweep |
+| Formatted printing | `FR_printNumF` with `putchar` callback, `FR_CEIL`, `FR_FLOOR` |
+
+Several test sections are gated by flags (`gTestForwardTrig`, etc.) near
+the top of the file. Edit them to enable additional sweeps.
+
+## Building
+
+```bash
+make            # compiles FR_Math_Example1
+make run        # compiles and runs
+make clean      # removes build artifacts
+```
+
+Or compile manually:
+
+```bash
+g++ -I../../src -Wall -Os \
+    FR_Math_Example1.cpp ../../src/FR_math.c ../../src/FR_math_2D.cpp \
+    -lm -o FR_Math_Example1
+```
+
+## Dependencies
+
+- A C++ compiler (g++ or clang++)
+- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`, `../../src/FR_math_2D.cpp`, `../../src/FR_math_2D.h`)
+- Standard C math library (`-lm`)
diff --git a/examples/trig-accuracy/Makefile b/examples/trig-accuracy/Makefile
new file mode 100644
index 0000000..f81e4b0
--- /dev/null
+++ b/examples/trig-accuracy/Makefile
@@ -0,0 +1,57 @@
+# trig-accuracy — self-contained build
+# All artifacts stay in this directory.
+#
+# Requires libfixmath source at ../../compare_lfm/libfixmath/libfixmath/
+#
+# Usage:
+#   make          Build the example (fails if libfixmath not found)
+#   make run      Build and run
+#   make clean    Remove build artifacts
+
+CC  ?= gcc
+CXX ?= g++
+SRC_DIR = ../../src
+LFM_DIR = ../../compare_lfm/libfixmath/libfixmath
+
+CXXFLAGS = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os
+CFLAGS   = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os
+LDFLAGS  = -lm
+
+TARGET = trig_accuracy
+
+# libfixmath .c sources needed for sin/cos/tan
+LFM_SRCS = $(LFM_DIR)/fix16.c $(LFM_DIR)/fix16_trig.c $(LFM_DIR)/fix16_sqrt.c $(LFM_DIR)/fix16_exp.c
+LFM_OBJS = fix16.o fix16_trig.o fix16_sqrt.o fix16_exp.o
+
+.PHONY: all run clean check-lfm
+
+all: check-lfm $(TARGET)
+
+check-lfm:
+	@if [ ! -f $(LFM_DIR)/fix16.h ]; then \
+		echo "ERROR: libfixmath not found at $(LFM_DIR)"; \
+		echo "This example requires libfixmath source."; \
+		echo "Clone it into compare_lfm/libfixmath/ from the repo root."; \
+		exit 1; \
+	fi
+
+fix16.o: $(LFM_DIR)/fix16.c
+	$(CC) $(CFLAGS) -c $< -o $@
+
+fix16_trig.o: $(LFM_DIR)/fix16_trig.c
+	$(CC) $(CFLAGS) -c $< -o $@
+
+fix16_sqrt.o: $(LFM_DIR)/fix16_sqrt.c
+	$(CC) $(CFLAGS) -c $< -o $@
+
+fix16_exp.o: $(LFM_DIR)/fix16_exp.c
+	$(CC) $(CFLAGS) -c $< -o $@
+
+$(TARGET): trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS)
+	$(CXX) $(CXXFLAGS) trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS) $(LDFLAGS) -o $@
+
+run: $(TARGET)
+	./$(TARGET)
+
+clean:
+	rm -f $(TARGET) *.o *.gcda *.gcno
diff --git a/examples/trig-accuracy/README.md b/examples/trig-accuracy/README.md
new file mode 100644
index 0000000..8406c62
--- /dev/null
+++ b/examples/trig-accuracy/README.md
@@ -0,0 +1,80 @@
+# Trig Accuracy — FR_Math vs libfixmath
+
+Head-to-head trig accuracy comparison between FR_Math and
+[libfixmath](https://github.com/PetteriAimworking/libfixmath),
+using IEEE 754 double-precision as the reference.
+
+## What it demonstrates
+
+- Sweeps sin, cos, and tan over 0-360 degrees in 1-degree steps
+- FR_Math calls: `FR_SinI(deg)`, `FR_CosI(deg)`, `FR_TanI(deg)` (integer degrees, s15.16 output)
+- libfixmath calls: `fix16_sin`, `fix16_cos`, `fix16_tan` (fix16_t radians, Q16.16 output)
+- Reference: `sin()`, `cos()`, `tan()` from `<cmath>` (IEEE 754 double)
+
+## Output tables
+
+**Detail table** (one row per degree):
+
+```
+  deg | FR_sin    LFM_sin   ref_sin    FR_err%  LFM_err% | (same for cos) | (same for tan)
+```
+
+**Summary table**:
+
+```
+  function   | FR_max%      FR_avg%      | LFM_max%     LFM_avg%
+  -----------+--------------------------+---------------------------
+  sin        | ...          ...          | ...          ...
+  cos        | ...          ...          | ...          ...
+  tan        | ...          ...          | ...          ...
+```
+
+## Building
+
+This example requires the libfixmath source tree at
+`../../compare_lfm/libfixmath/libfixmath/`.
+
+```bash
+make            # compiles trig_accuracy (checks for libfixmath)
+make run        # compiles and runs
+make clean      # removes build artifacts
+```
+
+Or compile manually:
+
+```bash
+LFM=../../compare_lfm/libfixmath/libfixmath
+g++ -I../../src -I$LFM -Wall -Os \
+    trig_accuracy.cpp ../../src/FR_math.c \
+    $LFM/fix16.c $LFM/fix16_trig.c $LFM/fix16_sqrt.c $LFM/fix16_exp.c \
+    -lm -o trig_accuracy
+```
+
+## Expected output
+
+```
+FR_Math vs libfixmath — Trig Accuracy Comparison  (v...)
+
+  deg | FR_sin    LFM_sin   ref_sin  ...
+  ----+-------------------------------...
+    0 |  0.00000  0.00000  0.00000  ...
+    1 |  0.01745  0.01745  0.01745  ...
+  ...
+  360 |  0.00000  0.00000 -0.00000  ...
+
+  ============================================================
+  Summary
+  ============================================================
+
+  function   | FR_max%      FR_avg%      | LFM_max%     LFM_avg%
+  ...
+
+--- end ---
+```
+
+## Dependencies
+
+- A C++ compiler (g++ or clang++)
+- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`)
+- libfixmath source at `../../compare_lfm/libfixmath/libfixmath/`
+- Standard C math library (`-lm`)
diff --git a/examples/trig-accuracy/trig_accuracy b/examples/trig-accuracy/trig_accuracy
new file mode 100755
index 0000000..828f4ac
Binary files /dev/null and b/examples/trig-accuracy/trig_accuracy differ
diff --git a/examples/trig-accuracy/trig_accuracy.cpp b/examples/trig-accuracy/trig_accuracy.cpp
new file mode 100644
index 0000000..189367a
--- /dev/null
+++ b/examples/trig-accuracy/trig_accuracy.cpp
@@ -0,0 +1,139 @@
+/**
+ * trig_accuracy.cpp — FR_math vs libfixmath vs IEEE double trig comparison
+ *
+ * Sweeps 0-360 degrees in 1-degree steps for sin, cos, tan.
+ * Prints a per-degree detail table and a summary table.
+ *
+ * Requires libfixmath source at compare_lfm/libfixmath/libfixmath/.
+ * Build:  make ex_trig_accuracy   (only built if libfixmath is present)
+ * Run:    ./build/ex_trig_accuracy
+ *
+ * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h)
+ */
+
+#include <stdio.h>
+#include <cmath>
+
+#include "FR_defs.h"
+#include "FR_math.h"
+#include "fixmath.h"
+
+static double pct_err(double measured, double ref)
+{
+    if (fabs(ref) < 1e-12)
+        return fabs(measured) * 100.0;
+    return ((measured - ref) / ref) * 100.0;
+}
+
+typedef struct {
+    double max_abs_pct;
+    double sum_abs_pct;
+    int    n;
+} err_stats_t;
+
+static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; }
+
+static void err_add(err_stats_t *e, double pct)
+{
+    double a = fabs(pct);
+    if (a > e->max_abs_pct) e->max_abs_pct = a;
+    e->sum_abs_pct += a;
+    e->n++;
+}
+
+/* ================================================================== */
+int main()
+{
+    printf("FR_Math vs libfixmath — Trig Accuracy Comparison  (v%s)\n\n", FR_MATH_VERSION);
+
+    err_stats_t fr_sin_e, fr_cos_e, fr_tan_e;
+    err_stats_t lf_sin_e, lf_cos_e, lf_tan_e;
+    err_reset(&fr_sin_e); err_reset(&fr_cos_e); err_reset(&fr_tan_e);
+    err_reset(&lf_sin_e); err_reset(&lf_cos_e); err_reset(&lf_tan_e);
+
+    /* Header */
+    printf("  deg | FR_sin    LFM_sin   ref_sin    FR_err%%  LFM_err%%"
+           " | FR_cos    LFM_cos   ref_cos    FR_err%%  LFM_err%%"
+           " | FR_tan      LFM_tan     ref_tan      FR_err%%   LFM_err%%\n");
+    printf("  ----+");
+    for (int g = 0; g < 3; g++) printf("--------------------------------------------------------------+");
+    printf("\n");
+
+    for (int deg = 0; deg <= 360; deg++) {
+        double rad_d = deg * M_PI / 180.0;
+
+        /* IEEE double reference */
+        double ref_s = sin(rad_d);
+        double ref_c = cos(rad_d);
+        double ref_t = tan(rad_d);
+
+        /* FR_math: integer-degree API, result is s15.16 */
+        double fr_s = (double)FR_SinI(deg) / 65536.0;
+        double fr_c = (double)FR_CosI(deg) / 65536.0;
+        double fr_t = (double)FR_TanI(deg) / 65536.0;
+
+        /* libfixmath: convert degrees to radians as fix16_t */
+        fix16_t lf_rad = fix16_from_dbl(rad_d);
+        double lf_s = fix16_to_dbl(fix16_sin(lf_rad));
+        double lf_c = fix16_to_dbl(fix16_cos(lf_rad));
+        double lf_t = fix16_to_dbl(fix16_tan(lf_rad));
+
+        /* Error calculation */
+        double fse = pct_err(fr_s, ref_s);
+        double fce = pct_err(fr_c, ref_c);
+        double fte = pct_err(fr_t, ref_t);
+        double lse = pct_err(lf_s, ref_s);
+        double lce = pct_err(lf_c, ref_c);
+        double lte = pct_err(lf_t, ref_t);
+
+        err_add(&fr_sin_e, fse); err_add(&fr_cos_e, fce); err_add(&fr_tan_e, fte);
+        err_add(&lf_sin_e, lse); err_add(&lf_cos_e, lce); err_add(&lf_tan_e, lte);
+
+        /* Clamp tan display for readability near poles */
+        int tan_pole = (fabs(ref_t) > 1000.0) ? 1 : 0;
+
+        printf("  %3d |", deg);
+        printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%",
+               fr_s, lf_s, ref_s, fse, lse);
+        printf(" |");
+        printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%",
+               fr_c, lf_c, ref_c, fce, lce);
+        printf(" |");
+        if (tan_pole)
+            printf(" %10.1f  %10.1f  %10.1f  (pole)    (pole)",
+                   fr_t, lf_t, ref_t);
+        else
+            printf(" %10.5f %10.5f %10.5f %9.3f%% %9.3f%%",
+                   fr_t, lf_t, ref_t, fte, lte);
+        printf("\n");
+    }
+
+    /* Summary table */
+    printf("\n  ============================================================\n");
+    printf("  Summary\n");
+    printf("  ============================================================\n\n");
+    printf("  %-10s | %12s %12s | %12s %12s\n",
+           "function", "FR_max%", "FR_avg%", "LFM_max%", "LFM_avg%");
+    printf("  %-10s-+-%-12s-%-12s-+-%-12s-%-12s\n",
+           "----------", "------------", "------------",
+           "------------", "------------");
+
+    #define SUMMARY_ROW(name, fr_e, lf_e)                                 \
+        printf("  %-10s | %11.4f%% %11.4f%% | %11.4f%% %11.4f%%\n",      \
+               name,                                                       \
+               (fr_e).max_abs_pct,                                         \
+               (fr_e).n > 0 ? (fr_e).sum_abs_pct / (fr_e).n : 0.0,        \
+               (lf_e).max_abs_pct,                                         \
+               (lf_e).n > 0 ? (lf_e).sum_abs_pct / (lf_e).n : 0.0)
+
+    SUMMARY_ROW("sin", fr_sin_e, lf_sin_e);
+    SUMMARY_ROW("cos", fr_cos_e, lf_cos_e);
+    SUMMARY_ROW("tan", fr_tan_e, lf_tan_e);
+
+    printf("\n  FR_math:    FR_SinI/FR_CosI/FR_TanI  (integer degrees, s15.16 output)\n");
+    printf("  libfixmath: fix16_sin/cos/tan         (fix16_t radians, Q16.16 output)\n");
+    printf("  Reference:  IEEE 754 double sin/cos/tan\n");
+
+    printf("\n--- end ---\n");
+    return 0;
+}
diff --git a/examples/trig-functions/README.md b/examples/trig-functions/README.md
new file mode 100644
index 0000000..bf5b31f
--- /dev/null
+++ b/examples/trig-functions/README.md
@@ -0,0 +1,55 @@
+# Trig Functions — Arduino Example
+
+Demonstrates sin, cos, tan, atan2, and angle-unit conversions using
+integer-only fixed-point math on Arduino.
+
+## What it demonstrates
+
+| Feature | Functions / macros |
+|---------|-------------------|
+| Integer-degree trig | `FR_CosI`, `FR_SinI` (s15.16 output) |
+| BAM angle conversion | `FR_DEG2BAM`, `fr_cos_bam`, `fr_sin_bam` |
+| Radian-native trig | `fr_cos`, `fr_sin`, `fr_tan` (arbitrary input radix) |
+| Inverse trig | `FR_atan2`, `FR_acos` |
+| Angle conversion | `FR_DEG2RAD` (shift-only, no multiply) |
+
+## Hardware
+
+Any Arduino board with a serial port. Output at 9600 baud.
+
+## Building
+
+**Arduino IDE**: Open `trig-functions.ino` from **File > Examples > FR_Math > trig-functions**.
+
+**Arduino CLI**:
+
+```bash
+arduino-cli compile --fqbn arduino:avr:uno examples/trig-functions
+arduino-cli upload  --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/trig-functions
+arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600
+```
+
+## Expected serial output
+
+```
+=== FR_Math Trigonometry ===
+
+Integer-degree API (s15.16 output):
+  cos(0) = 65536  sin(0) = 0
+  cos(45) = 46341  sin(45) = 46340
+  cos(90) = 0  sin(90) = 65536
+  ...
+
+BAM API:
+  60 deg -> BAM = 10922
+  cos_bam(60) = 32768
+  sin_bam(60) = 56756
+
+Radian API (radix 12):
+  cos(1 rad) = 35419
+  sin(1 rad) = 55117
+  tan(1 rad) = 101994
+  ...
+
+Done.
+```
diff --git a/examples/wave-generators/README.md b/examples/wave-generators/README.md
new file mode 100644
index 0000000..a262185
--- /dev/null
+++ b/examples/wave-generators/README.md
@@ -0,0 +1,57 @@
+# Wave Generators — Arduino Example
+
+Demonstrates square, triangle, sawtooth, PWM, morphing triangle,
+noise, and ADSR envelope generators on Arduino — all integer-only.
+
+## What it demonstrates
+
+| Feature | Functions / macros |
+|---------|-------------------|
+| Phase increment | `FR_HZ2BAM_INC` (Hz + sample rate to BAM step) |
+| Square wave | `fr_wave_sqr` |
+| Triangle wave | `fr_wave_tri` |
+| Sawtooth wave | `fr_wave_saw` |
+| PWM (variable duty) | `fr_wave_pwm` |
+| Morphing triangle | `fr_wave_tri_morph` |
+| LFSR noise | `fr_wave_noise` |
+| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` |
+
+All wave functions take a `u16` BAM phase and return `s16` in [-32767, +32767].
+The ADSR envelope returns `s16` in [0, 32767] (unipolar).
+
+## Hardware
+
+Any Arduino board with a serial port. Output at 9600 baud.
+
+## Building
+
+**Arduino IDE**: Open `wave-generators.ino` from **File > Examples > FR_Math > wave-generators**.
+
+**Arduino CLI**:
+
+```bash
+arduino-cli compile --fqbn arduino:avr:uno examples/wave-generators
+arduino-cli upload  --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/wave-generators
+arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600
+```
+
+## Expected serial output
+
+```
+=== FR_Math Wave Generators ===
+
+440 Hz @ 8 kHz -> phase_inc = 3604
+
+phase   sqr     tri     saw     pwm75   morph
+0       32767   0       -32767  32767   0
+4096    32767   8192    -24575  32767   32767
+...
+
+Noise (10 samples):
+16214 -8277 32725 ...
+
+ADSR envelope (attack=100, decay=200, sustain=0.75, release=400):
+327 654 981 ...
+
+Done.
+```
diff --git a/examples/waveform-synth/Makefile b/examples/waveform-synth/Makefile
new file mode 100644
index 0000000..33a36a6
--- /dev/null
+++ b/examples/waveform-synth/Makefile
@@ -0,0 +1,33 @@
+# waveform-synth — self-contained build
+# All artifacts stay in this directory.
+#
+# Usage:
+#   make          Build the example
+#   make run      Build and run (ASCII art mode)
+#   make run-csv  Build and run (CSV output)
+#   make clean    Remove build artifacts
+
+CC  ?= gcc
+CXX ?= g++
+SRC_DIR = ../../src
+
+CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os
+LDFLAGS  = -lm
+
+TARGET = waveform_synth
+
+.PHONY: all run run-csv clean
+
+all: $(TARGET)
+
+$(TARGET): waveform_synth.cpp $(SRC_DIR)/FR_math.c
+	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
+
+run: $(TARGET)
+	./$(TARGET)
+
+run-csv: $(TARGET)
+	./$(TARGET) --csv
+
+clean:
+	rm -f $(TARGET) *.o *.gcda *.gcno
diff --git a/examples/waveform-synth/README.md b/examples/waveform-synth/README.md
new file mode 100644
index 0000000..da8e8ae
--- /dev/null
+++ b/examples/waveform-synth/README.md
@@ -0,0 +1,76 @@
+# Waveform Synth
+
+Generates waveforms using FR_Math's wave generators and ADSR envelope,
+with both ASCII art visualization and CSV output modes.
+
+## What it demonstrates
+
+| Feature | Functions used |
+|---------|--------------|
+| Square wave | `fr_wave_sqr` |
+| Triangle wave | `fr_wave_tri` |
+| Sawtooth wave | `fr_wave_saw` |
+| PWM (75% duty) | `fr_wave_pwm` |
+| Sine wave | `fr_sin_bam` |
+| LFSR noise | `fr_wave_noise` |
+| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` |
+| Amplitude modulation | Sine wave multiplied by ADSR envelope |
+
+## Building
+
+```bash
+make            # compiles waveform_synth
+make run        # compiles and runs (ASCII art)
+make run-csv    # compiles and runs (CSV output)
+make clean      # removes build artifacts
+```
+
+Or compile manually:
+
+```bash
+g++ -I../../src -Wall -Os waveform_synth.cpp ../../src/FR_math.c -lm -o waveform_synth
+```
+
+## Running
+
+**ASCII art mode** (default):
+
+```bash
+./waveform_synth
+```
+
+Renders each waveform as a 64-column x 21-row ASCII plot, plus the
+ADSR envelope and a combined sin * ADSR amplitude-modulation demo.
+
+**CSV mode**:
+
+```bash
+./waveform_synth --csv
+```
+
+Outputs CSV with columns: `sample,sqr,tri,saw,pwm,sin,noise,envelope,combined`.
+Suitable for importing into a spreadsheet or plotting tool.
+
+## Expected output (ASCII mode)
+
+```
+FR_Math — Waveform Synth Demo  (v...)
+  256 samples/cycle, BAM increment = 256
+
+  Square (fr_wave_sqr) (256 samples, showing 64 columns)
+   +max |********...                            |
+      0 |                                       |
+   -max |              ...************************|
+
+  Triangle (fr_wave_tri) ...
+  ...
+  Sin * ADSR (amplitude modulation) ...
+
+--- end ---
+```
+
+## Dependencies
+
+- A C++ compiler (g++ or clang++)
+- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`)
+- Standard C math library (`-lm`)
diff --git a/examples/waveform-synth/waveform_synth b/examples/waveform-synth/waveform_synth
new file mode 100755
index 0000000..815fdce
Binary files /dev/null and b/examples/waveform-synth/waveform_synth differ
diff --git a/examples/waveform-synth/waveform_synth.cpp b/examples/waveform-synth/waveform_synth.cpp
new file mode 100644
index 0000000..988433e
--- /dev/null
+++ b/examples/waveform-synth/waveform_synth.cpp
@@ -0,0 +1,206 @@
+/**
+ * waveform_synth.cpp — Waveform generation with ASCII art and CSV output
+ *
+ * Default (no args):  ASCII art rendering of all waveforms + ADSR demo
+ * --csv flag:         CSV output (columns: sample, sqr, tri, saw, pwm, sin, noise, envelope)
+ *
+ * Build:  make ex_waveform
+ * Run:    ./build/ex_waveform           (ASCII art)
+ *         ./build/ex_waveform --csv     (CSV output)
+ *
+ * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h)
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "FR_defs.h"
+#include "FR_math.h"
+
+#define NUM_SAMPLES    256
+#define ROWS           21
+#define COLS           64
+#define BAM_INC        (65536 / NUM_SAMPLES)  /* one full cycle in 256 samples */
+#define PWM_DUTY       49152                  /* 75% duty = 49152/65536 */
+
+/* Map s16 [-32767, +32767] to row [0, ROWS-1].  Top row = +32767. */
+static int val_to_row(s16 v)
+{
+    int row = (int)(ROWS - 1) - (int)(((long)v + 32767L) * (ROWS - 1) / 65534L);
+    if (row < 0) row = 0;
+    if (row >= ROWS) row = ROWS - 1;
+    return row;
+}
+
+/* Subsample: pick COLS points from NUM_SAMPLES evenly */
+static int sample_index(int col)
+{
+    return (col * NUM_SAMPLES) / COLS;
+}
+
+/* Print one ASCII waveform */
+static void ascii_wave(const char *title, s16 *buf, int n)
+{
+    printf("\n  %s (%d samples, showing %d columns)\n", title, n, COLS);
+
+    /* Build a character grid */
+    char grid[ROWS][COLS + 1];
+    for (int r = 0; r < ROWS; r++) {
+        for (int c = 0; c < COLS; c++)
+            grid[r][c] = ' ';
+        grid[r][COLS] = '\0';
+    }
+
+    /* Place asterisks */
+    for (int c = 0; c < COLS; c++) {
+        int idx = sample_index(c);
+        if (idx >= n) idx = n - 1;
+        int r = val_to_row(buf[idx]);
+        grid[r][c] = '*';
+    }
+
+    /* Draw with axis labels */
+    for (int r = 0; r < ROWS; r++) {
+        const char *label = "";
+        if (r == 0)             label = "+max";
+        else if (r == ROWS / 2) label = "   0";
+        else if (r == ROWS - 1) label = "-max";
+        printf("  %5s |%s|\n", label, grid[r]);
+    }
+}
+
+/* Print ASCII for ADSR envelope (0..32767 unipolar) */
+static void ascii_envelope(const char *title, s16 *buf, int n)
+{
+    printf("\n  %s (%d samples, showing %d columns)\n", title, n, COLS);
+
+    char grid[ROWS][COLS + 1];
+    for (int r = 0; r < ROWS; r++) {
+        for (int c = 0; c < COLS; c++)
+            grid[r][c] = ' ';
+        grid[r][COLS] = '\0';
+    }
+
+    for (int c = 0; c < COLS; c++) {
+        int idx = sample_index(c);
+        if (idx >= n) idx = n - 1;
+        /* Envelope is 0..32767; scale to full grid: treat 0 as -32767 for display */
+        s16 v = (s16)(buf[idx] * 2 - 32767);
+        int r = val_to_row(v);
+        grid[r][c] = '*';
+    }
+
+    for (int r = 0; r < ROWS; r++) {
+        const char *label = "";
+        if (r == 0)             label = " 1.0";
+        else if (r == ROWS / 2) label = " 0.5";
+        else if (r == ROWS - 1) label = " 0.0";
+        printf("  %5s |%s|\n", label, grid[r]);
+    }
+}
+
+/* ================================================================== */
+int main(int argc, char *argv[])
+{
+    int csv_mode = 0;
+    for (int i = 1; i < argc; i++) {
+        if (strcmp(argv[i], "--csv") == 0) csv_mode = 1;
+    }
+
+    /* Generate waveform buffers */
+    s16 buf_sqr[NUM_SAMPLES];
+    s16 buf_tri[NUM_SAMPLES];
+    s16 buf_saw[NUM_SAMPLES];
+    s16 buf_pwm[NUM_SAMPLES];
+    s16 buf_sin[NUM_SAMPLES];
+    s16 buf_noise[NUM_SAMPLES];
+
+    u32 noise_state = 0xDEADBEEF;
+    u16 phase = 0;
+
+    for (int i = 0; i < NUM_SAMPLES; i++) {
+        buf_sqr[i]   = fr_wave_sqr(phase);
+        buf_tri[i]   = fr_wave_tri(phase);
+        buf_saw[i]   = fr_wave_saw(phase);
+        buf_pwm[i]   = fr_wave_pwm(phase, PWM_DUTY);
+        buf_sin[i]   = (s16)(fr_sin_bam(phase) >> 1);  /* s15.16 -> s15 approx */
+        buf_noise[i] = fr_wave_noise(&noise_state);
+        phase += BAM_INC;
+    }
+
+    /* ADSR envelope demo */
+    #define ENV_TOTAL 512
+    s16 buf_env[ENV_TOTAL];
+
+    fr_adsr_t env;
+    fr_adsr_init(&env,
+        64,     /* attack samples */
+        32,     /* decay samples */
+        16384,  /* sustain level s0.15 (50%) */
+        64      /* release samples */
+    );
+
+    fr_adsr_trigger(&env);
+    int release_at = 256;
+    for (int i = 0; i < ENV_TOTAL; i++) {
+        if (i == release_at)
+            fr_adsr_release(&env);
+        buf_env[i] = fr_adsr_step(&env);
+    }
+
+    /* Combined: sin * envelope (amplitude modulation) */
+    #define COMBINED_LEN ENV_TOTAL
+    s16 buf_combined[COMBINED_LEN];
+    phase = 0;
+    for (int i = 0; i < COMBINED_LEN; i++) {
+        s32 sin_val = fr_sin_bam(phase);  /* s15.16 */
+        s32 env_val = (s32)buf_env[i];    /* 0..32767 (s0.15) */
+        /* Multiply: (s15.16 * s0.15) >> 15 = s15.16, then >> 1 for s15 */
+        s32 combined = (sin_val * env_val) >> 16;
+        if (combined > 32767) combined = 32767;
+        if (combined < -32767) combined = -32767;
+        buf_combined[i] = (s16)combined;
+        phase += BAM_INC;
+    }
+
+    if (csv_mode) {
+        /* CSV header */
+        printf("sample,sqr,tri,saw,pwm,sin,noise,envelope,combined\n");
+
+        int max_len = COMBINED_LEN;
+        for (int i = 0; i < max_len; i++) {
+            printf("%d", i);
+            printf(",%d", i < NUM_SAMPLES ? buf_sqr[i]   : 0);
+            printf(",%d", i < NUM_SAMPLES ? buf_tri[i]   : 0);
+            printf(",%d", i < NUM_SAMPLES ? buf_saw[i]   : 0);
+            printf(",%d", i < NUM_SAMPLES ? buf_pwm[i]   : 0);
+            printf(",%d", i < NUM_SAMPLES ? buf_sin[i]   : 0);
+            printf(",%d", i < NUM_SAMPLES ? buf_noise[i] : 0);
+            printf(",%d", buf_env[i]);
+            printf(",%d", buf_combined[i]);
+            printf("\n");
+        }
+    } else {
+        printf("FR_Math — Waveform Synth Demo  (v%s)\n", FR_MATH_VERSION);
+        printf("  %d samples/cycle, BAM increment = %d\n", NUM_SAMPLES, BAM_INC);
+
+        ascii_wave("Square (fr_wave_sqr)",       buf_sqr,  NUM_SAMPLES);
+        ascii_wave("Triangle (fr_wave_tri)",     buf_tri,  NUM_SAMPLES);
+        ascii_wave("Sawtooth (fr_wave_saw)",     buf_saw,  NUM_SAMPLES);
+        ascii_wave("PWM 75%% (fr_wave_pwm)",     buf_pwm,  NUM_SAMPLES);
+        ascii_wave("Sine (fr_sin_bam)",          buf_sin,  NUM_SAMPLES);
+        ascii_wave("Noise (fr_wave_noise)",      buf_noise, NUM_SAMPLES);
+
+        printf("\n  ADSR params: attack=64, decay=32, sustain=50%%, release=64\n");
+        printf("  Trigger at sample 0, release at sample %d, total %d samples\n",
+               release_at, ENV_TOTAL);
+        ascii_envelope("ADSR Envelope (fr_adsr)", buf_env, ENV_TOTAL);
+
+        ascii_wave("Sin * ADSR (amplitude modulation)", buf_combined, COMBINED_LEN);
+
+        printf("\n  Tip: run with --csv to get machine-readable output\n");
+    }
+
+    printf("\n--- end ---\n");
+    return 0;
+}
diff --git a/idf_component.yml b/idf_component.yml
index 6a0d030..8097972 100644
--- a/idf_component.yml
+++ b/idf_component.yml
@@ -1,4 +1,4 @@
-version: "2.0.7"
+version: "2.0.8"
 description: "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms. Zero dependencies."
 url: "https://github.com/deftio/fr_math"
 repository: "https://github.com/deftio/fr_math.git"
diff --git a/keywords.txt b/keywords.txt
index 1ab2703..3dc5590 100644
--- a/keywords.txt
+++ b/keywords.txt
@@ -14,9 +14,9 @@ fr_adsr_t	KEYWORD1
 FR_FixMuls	KEYWORD2
 FR_FixMulSat	KEYWORD2
 FR_FixAddSat	KEYWORD2
-FR_Cos	KEYWORD2
-FR_Sin	KEYWORD2
-FR_Tan	KEYWORD2
+fr_sin_deg	KEYWORD2
+fr_cos_deg	KEYWORD2
+fr_tan_deg	KEYWORD2
 FR_TanI	KEYWORD2
 FR_acos	KEYWORD2
 FR_asin	KEYWORD2
@@ -35,6 +35,7 @@ FR_printNumH	KEYWORD2
 FR_numstr	KEYWORD2
 fr_cos_bam	KEYWORD2
 fr_sin_bam	KEYWORD2
+fr_tan_bam	KEYWORD2
 fr_cos	KEYWORD2
 fr_sin	KEYWORD2
 fr_tan	KEYWORD2
@@ -99,3 +100,6 @@ FR_SLOG2E	LITERAL1
 FR_SrLOG2E	LITERAL1
 FR_SLOG2_10	LITERAL1
 FR_SrLOG2_10	LITERAL1
+FR_Cos	LITERAL1
+FR_Sin	LITERAL1
+FR_Tan	LITERAL1
diff --git a/library.json b/library.json
index 495a89f..17f9649 100644
--- a/library.json
+++ b/library.json
@@ -1,6 +1,6 @@
 {
     "name": "FR_Math",
-    "version": "2.0.7",
+    "version": "2.0.8",
     "description": "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms in 4KB of flash. Zero dependencies.",
     "keywords": [
         "fixed-point",
diff --git a/library.properties b/library.properties
index cd2d953..47dde32 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
 name=FR_Math
-version=2.0.7
+version=2.0.8
 author=M. A. Chatterjee <deftio@deftio.com>
 maintainer=M. A. Chatterjee <deftio@deftio.com>
 sentence=Compact fixed-point math library for embedded systems. 4KB flash, zero dependencies, any radix.
diff --git a/llms.txt b/llms.txt
index a6d254f..5930e4d 100644
--- a/llms.txt
+++ b/llms.txt
@@ -9,7 +9,7 @@ or libraries. Pure C99, zero dependencies beyond `<stdint.h>`.
 - Repository: https://github.com/deftio/fr_math
 - Documentation: https://deftio.github.io/fr_math/
 - License: BSD-2-Clause
-- Version: 2.0.7
+- Version: 2.0.8
 
 ## Key concept: radix parameter
 
@@ -25,9 +25,8 @@ Common radix choices:
 ## Source files
 
 - `src/FR_math.h`   — all public declarations, macros, constants
-- `src/FR_math.c`   — all function implementations (~42KB)
+- `src/FR_math.c`   — all function implementations (trig tables inlined, ~45KB)
 - `src/FR_defs.h`   — type aliases: s8, s16, s32, u8, u16, u32
-- `src/FR_trig_table.h` — precomputed sine table (256 entries)
 - `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++)
 
 ## Types
@@ -82,6 +81,7 @@ s32 fr_tan(s32 rad, u16 radix);
 // BAM (Binary Angle Measurement) — u16 where 65536 = 360 degrees:
 s32 fr_cos_bam(u16 bam);
 s32 fr_sin_bam(u16 bam);
+s32 fr_tan_bam(u16 bam);
 
 // Degree API at any radix:
 s32 FR_Cos(s16 deg, u16 radix);
@@ -164,20 +164,24 @@ s32 ex    = FR_EXP(I2FR(1, R), R);        // e^1
 ## Building
 
 ```bash
-make lib       # build static library objects
-make test      # run all test suites
-make examples  # build example program
-make clean     # remove build artifacts
+make lib           # build static library objects
+make test          # run all test suites
+make examples      # build example programs
+make size-report   # cross-compile size report (Docker)
+make size-update   # size report + patch doc files
+make clean         # remove build artifacts
 ```
 
 ## Lean build options
 
 Define before including FR_math.h to exclude optional subsystems:
 
+- `FR_CORE_ONLY` — shorthand for FR_NO_PRINT + FR_NO_WAVES (~1.9 KB saved)
 - `FR_NO_PRINT` — removes FR_printNumF/D/H and FR_numstr (~1.3 KB saved)
 - `FR_NO_WAVES` — removes fr_wave_*, fr_adsr_*, FR_HZ2BAM_INC (~0.6 KB saved)
 
 ## Platform support
 
 Tested on: AVR (Arduino), ARM Cortex-M0/M4, ESP32 (Xtensa), RISC-V,
-x86/x64, MSP430, 68k, 8051. Code size is 4-8KB at -Os on 32-bit targets.
+x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11.
+Code size is 3-9 KB at -Os on 32-bit targets (Lean to Full).
diff --git a/makefile b/makefile
index eb8a9fc..196e127 100644
--- a/makefile
+++ b/makefile
@@ -36,7 +36,8 @@ help:
 	@echo "Build targets:"
 	@echo "  all              Build library and examples"
 	@echo "  lib              Build library objects only"
-	@echo "  examples         Build example program"
+	@echo "  examples         Build all example programs"
+	@echo "  run-examples     Build and run all desktop examples"
 	@echo ""
 	@echo "Test targets:"
 	@echo "  test             Run all tests"
@@ -54,9 +55,14 @@ help:
 	@echo "  coverage         Generate coverage report (gcov)"
 	@echo "  coverage-basic   Basic coverage info without lcov"
 	@echo "  coverage-html    HTML coverage report (requires lcov)"
-	@echo "  size-report      Multi-architecture size report"
+	@echo "  size-report      Multi-architecture size report (Docker)"
+	@echo "  size-update      Size report + patch doc files"
 	@echo "  size-simple      Size report for current platform"
 	@echo ""
+	@echo "Tools:"
+	@echo "  tools            Build diagnostic tools"
+	@echo "  trig-neighborhood  Build function neighborhood explorer"
+	@echo ""
 	@echo "Maintenance:"
 	@echo "  clean            Remove build artifacts"
 	@echo "  cleanall         Remove build artifacts and backups"
@@ -87,11 +93,38 @@ $(BUILD_DIR)/FR_math_2D.o: $(SRC_DIR)/FR_math_2D.cpp $(HEADERS)
 
 # Build examples
 .PHONY: examples
-examples: dirs $(BUILD_DIR)/fr_example
+examples: dirs $(BUILD_DIR)/fr_example ex-basics ex-logexp ex-waveform ex-trig-accuracy
 
 $(BUILD_DIR)/fr_example: $(EXAMPLE_DIR)/posix-example/FR_Math_Example1.cpp $(BUILD_DIR)/FR_math.o $(BUILD_DIR)/FR_math_2D.o
 	$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
 
+# Self-contained desktop examples (each has its own Makefile)
+.PHONY: ex-basics ex-logexp ex-waveform ex-trig-accuracy run-examples
+
+ex-basics:
+	@$(MAKE) -C $(EXAMPLE_DIR)/fixed-point-basics
+
+ex-logexp:
+	@$(MAKE) -C $(EXAMPLE_DIR)/log-exp-curves
+
+ex-waveform:
+	@$(MAKE) -C $(EXAMPLE_DIR)/waveform-synth
+
+ex-trig-accuracy:
+	@if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \
+		$(MAKE) -C $(EXAMPLE_DIR)/trig-accuracy; \
+	else \
+		echo "Skipping trig-accuracy (libfixmath not found)"; \
+	fi
+
+run-examples: examples
+	@echo ""; echo "=== fixed-point-basics ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics run
+	@echo ""; echo "=== log-exp-curves ===" ;     $(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves run
+	@echo ""; echo "=== waveform-synth ===" ;     $(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth run
+	@if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \
+		echo ""; echo "=== trig-accuracy ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy run; \
+	fi
+
 # Build and run tests
 .PHONY: test
 test: dirs examples test-basic test-comprehensive test-2d test-overflow test-full test-2d-complete test-tdd
@@ -192,10 +225,15 @@ coverage-html: clean dirs
 	@echo "HTML report: $(COV_DIR)/html/index.html"
 	@genhtml $(COV_DIR)/coverage.info --output-directory $(COV_DIR)/html
 
-# Size report - multi-architecture
+# Size report - multi-architecture (Docker cross-compilation)
 .PHONY: size-report
 size-report: dirs
-	@scripts/size_report.sh
+	@scripts/crossbuild_sizes.sh
+
+# Size report + patch doc files
+.PHONY: size-update
+size-update: dirs
+	@scripts/crossbuild_sizes.sh --update
 
 # Simple size report for current platform
 .PHONY: size-simple
@@ -211,14 +249,62 @@ size-simple: lib
 		ls -lh $(BUILD_DIR)/*.o; \
 	fi
 
+# Lean build: only functions with libfixmath equivalents (radian trig,
+# inverse trig, sqrt, log2, ln, exp, mul/div — no degree trig, no BAM
+# tan, no waves, no hypot exact, no log10).
+.PHONY: size-lean
+size-lean: dirs
+	@echo "=== LEAN Build (FR_LEAN — libfixmath-equivalent API only) ==="
+	@$(CC) -I$(SRC_DIR) $(LIB_WARN) -DFR_LEAN -DFR_NO_PRINT -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_lean.o
+	@size $(BUILD_DIR)/FR_math_lean.o
+	@echo ""
+
+# Full build: everything (default — all trig, waves, ADSR, print, etc.)
+.PHONY: size-full
+size-full: dirs
+	@echo "=== FULL Build (all features) ==="
+	@$(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_full.o
+	@size $(BUILD_DIR)/FR_math_full.o
+	@echo ""
+
+# Side-by-side lean vs full size comparison
+.PHONY: size-compare
+size-compare: size-lean size-full
+	@echo "=== Lean vs Full Comparison ==="
+	@LEAN=$$(size $(BUILD_DIR)/FR_math_lean.o | tail -1 | awk '{print $$1}'); \
+	 FULL=$$(size $(BUILD_DIR)/FR_math_full.o | tail -1 | awk '{print $$1}'); \
+	 echo "  Lean text: $${LEAN} bytes"; \
+	 echo "  Full text: $${FULL} bytes"
+
+# Tools
+TOOLS_DIR = tools
+
+.PHONY: tools
+tools: dirs trig-neighborhood
+
+.PHONY: trig-neighborhood
+trig-neighborhood: $(BUILD_DIR)/trig_neighborhood
+
+$(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR_math.c $(HEADERS)
+	$(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/tool_FR_math.o
+	$(CXX) $(CXXFLAGS) $(TOOLS_DIR)/trig_neighborhood.cpp $(BUILD_DIR)/tool_FR_math.o $(LDFLAGS) -o $@
+
 # Clean
 .PHONY: clean
 clean:
 	rm -rf $(BUILD_DIR) $(COV_DIR)
-	rm -f *.o *.gcda *.gcno *.exe *.info
+	rm -f *.o *.gcda *.gcno *.gcov *.exe *.info
+
+.PHONY: clean-examples
+clean-examples:
+	@$(MAKE) -s -C $(EXAMPLE_DIR)/posix-example clean 2>/dev/null || true
+	@$(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics clean 2>/dev/null || true
+	@$(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves clean 2>/dev/null || true
+	@$(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth clean 2>/dev/null || true
+	@$(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy clean 2>/dev/null || true
 
 .PHONY: cleanall
-cleanall: clean
+cleanall: clean clean-examples
 	rm -f *~ $(SRC_DIR)/*~ $(TEST_DIR)/*~
 
 # Basic coverage info without lcov
@@ -233,7 +319,7 @@ coverage-basic: clean dirs
 	@echo ""
 	@echo "=== Basic Coverage Info ==="
 	@if command -v gcov >/dev/null 2>&1; then \
-		gcov $(SRC_DIR)/FR_math.c -o $(BUILD_DIR) | grep -E "File|Lines executed"; \
+		cd $(BUILD_DIR) && gcov FR_math.o | grep -E "File|Lines executed"; \
 		echo ""; \
 		echo "For detailed coverage report, install lcov and run: make coverage"; \
 	else \
diff --git a/pages/assets/site.js b/pages/assets/site.js
index a686d8e..0a83734 100644
--- a/pages/assets/site.js
+++ b/pages/assets/site.js
@@ -4,6 +4,9 @@
    into every page so there's exactly one source of truth for the
    site title, version, menu, and legal line.
 
+   Version is loaded from version.json (generated by sync_version.sh)
+   so there is no hardcoded version string in this file.
+
    Page skeleton expected in each HTML file:
 
        <header id="site-header"></header>
@@ -16,8 +19,6 @@
    ════════════════════════════════════════════════════════════════════ */
 
 (function () {
-    var FR_VERSION = 'v2.0.7';
-
     // Detect whether we're a top-level page or inside guide/.
     // Works for both file:// and http(s):// because we look for the
     // literal "/guide/" segment anywhere in the path.
@@ -43,7 +44,7 @@
     ];
 
     // -----------------------------------------------------------------
-    // Build header
+    // Build header (version placeholder filled in after fetch)
     // -----------------------------------------------------------------
     var headerEl = document.getElementById('site-header');
     if (headerEl) {
@@ -60,12 +61,34 @@
         headerEl.innerHTML =
             '<div class="wrapper">' +
               '<a class="site-title" href="' + prefix + 'index.html">' +
-                'FR_Math <span class="site-version">' + FR_VERSION + '</span>' +
+                'FR_Math <span class="site-version" id="site-version"></span>' +
               '</a>' +
               '<nav class="site-nav">' + navHtml + '</nav>' +
             '</div>';
     }
 
+    // -----------------------------------------------------------------
+    // Load version from version.json (generated by sync_version.sh)
+    // Falls back gracefully: file:// or missing file just hides the tag.
+    // -----------------------------------------------------------------
+    var versionUrl = prefix + 'version.json';
+    try {
+        var xhr = new XMLHttpRequest();
+        xhr.open('GET', versionUrl, true);
+        xhr.onload = function () {
+            if (xhr.status === 200 || xhr.status === 0) {  // 0 for file://
+                try {
+                    var data = JSON.parse(xhr.responseText);
+                    var el = document.getElementById('site-version');
+                    if (el && data.version) {
+                        el.textContent = 'v' + data.version;
+                    }
+                } catch (e) { /* malformed JSON — leave blank */ }
+            }
+        };
+        xhr.send();
+    } catch (e) { /* XMLHttpRequest blocked (e.g. file:// CORS) — leave blank */ }
+
     // -----------------------------------------------------------------
     // Build footer
     // -----------------------------------------------------------------
diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html
index 5c03c66..fb67159 100644
--- a/pages/guide/api-reference.html
+++ b/pages/guide/api-reference.html
@@ -5,7 +5,7 @@
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
   <title>API Reference — FR_Math</title>
-  <meta name="description" content="Complete FR_Math API reference: inputs, outputs, radix, precision, and saturation behaviour for every public symbol.">
+  <meta name="description" content="Complete FR_Math API reference: inputs, outputs, radix, precision, and saturation behavior for every public symbol.">
   <link rel="stylesheet" href="../assets/main.css">
 </head>
 <body>
@@ -18,7 +18,7 @@
 <h1>API Reference</h1>
 
 <p>Every public symbol, grouped by topic. Each entry lists the radix
-convention, the precision, and the error / saturation behaviour. All
+convention, the precision, and the error / saturation behavior. All
 types are from <code>FR_defs.h</code>: <code>s8 s16 s32 s64</code> for
 signed and <code>u8 u16 u32 u64</code> for unsigned integers (these are
 aliases for the <code>&lt;stdint.h&gt;</code> types).</p>
@@ -29,7 +29,7 @@ <h2>Reading this reference</h2>
 <strong>radix handling</strong> and <strong>precision</strong>
 separately, because in a mixed-radix library those four things are
 what actually lets you plan an arithmetic pipeline without hidden
-quantisation. If you are new to fixed-point, the
+quantization. If you are new to fixed-point, the
 <a href="fixed-point-primer.html">Fixed-Point Primer</a> explains the
 notation first; come back here once you&rsquo;re comfortable reading
 <code>s15.16</code> and <code>s0.15</code>.</p>
@@ -101,6 +101,8 @@ <h3>Sentinel return values (<code>FR_defs.h</code>)</h3>
 <tr><td><code>FR_OVERFLOW_POS</code></td><td><code>0x7FFFFFFF</code> (<code>INT32_MAX</code>)</td><td>Saturating ops when the true result exceeds <code>+2<sup>31</sup></code>.</td></tr>
 <tr><td><code>FR_OVERFLOW_NEG</code></td><td><code>0x80000000</code> (<code>INT32_MIN</code>)</td><td>Saturating ops when the true result is below <code>&minus;2<sup>31</sup></code>.</td></tr>
 <tr><td><code>FR_DOMAIN_ERROR</code></td><td><code>0x80000000</code> (<code>INT32_MIN</code>)</td><td>Functions with an invalid input, e.g. <code>FR_sqrt(-1)</code>, <code>FR_log2(0)</code>, <code>FR_asin(2.0)</code>. <strong>Shares the bit pattern of <code>FR_OVERFLOW_NEG</code></strong>, so don&rsquo;t mix a <code>&le; FR_OVERFLOW_NEG</code> check with a domain check &mdash; test for the exact sentinel.</td></tr>
+<tr><td><code>FR_TRIG_MAXVAL</code></td><td><code>0x7FFFFFFF</code> (<code>INT32_MAX</code>)</td><td>Tangent saturation ceiling. Returned by <code>fr_tan_bam</code>, <code>fr_tan</code>, <code>fr_tan_deg</code>, and <code>FR_TanI</code> when the angle is near a pole (90&deg; + k&middot;180&deg;).</td></tr>
+<tr><td><code>FR_TRIG_MINVAL</code></td><td><code>-FR_TRIG_MAXVAL</code></td><td>Tangent saturation floor. Negative-side pole saturation.</td></tr>
 </tbody>
 </table>
 
@@ -162,7 +164,7 @@ <h3>Integer &harr; fixed-point</h3>
   <td><code>FR_INT(x, r)</code></td>
   <td><code>x</code>: fixed-point at radix <code>r</code></td>
   <td>integer</td>
-  <td>Truncates toward <strong>zero</strong>. <code>FR_INT(-1, 4) == 0</code>. Useful when you want C&rsquo;s normal integer-cast behaviour.</td>
+  <td>Truncates toward <strong>zero</strong>. <code>FR_INT(-1, 4) == 0</code>. Useful when you want C&rsquo;s normal integer-cast behavior.</td>
 </tr>
 <tr>
   <td><code>FR_NUM(i, f, d, r)</code></td>
@@ -316,7 +318,7 @@ <h2>Utility macros</h2>
   <td><code>FR_DIV_TRUNC(x, xr, y, yr)</code></td>
   <td><code>x</code>: numerator at radix <code>xr</code>; <code>y</code>: denominator at radix <code>yr</code></td>
   <td><code>((s64)(x) &lt;&lt; (yr)) / (s32)(y)</code></td>
-  <td>Truncating division (rounds toward zero). This was the behaviour of <code>FR_DIV</code> in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable.</td>
+  <td>Truncating division (rounds toward zero). This was the behavior of <code>FR_DIV</code> in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable.</td>
 </tr>
 <tr>
   <td><code>FR_DIV32(x, xr, y, yr)</code></td>
@@ -335,7 +337,7 @@ <h2>Utility macros</h2>
 
 <h2>Arithmetic</h2>
 
-<p>FR_Math splits arithmetic into three flavours. The
+<p>FR_Math splits arithmetic into three flavors. The
 <strong>macros</strong> (<code>FR_ADD</code>, <code>FR_SUB</code>)
 are mixed-radix, inline, and wrap on overflow. The <strong>s.16
 helper functions</strong> (<code>FR_FixMuls</code>,
@@ -507,7 +509,7 @@ <h3>Why <code>u16</code> for BAM (not <code>s32</code>)?</h3>
 
 <p>&ldquo;But what if I want to pass in any signed angle without
 worrying about conversion?&rdquo; That is exactly what
-<code>FR_CosI(deg)</code>, <code>FR_Cos(deg, radix)</code>, and
+<code>FR_CosI(deg)</code>, <code>fr_cos_deg(deg, radix)</code>, and
 <code>fr_cos(rad, radix)</code> are for. All three take
 <em>signed</em> inputs and reduce them to BAM for you. The only
 place you actually see a <code>u16</code> is at the internal
@@ -564,7 +566,7 @@ <h4>Worked example: keeping precision on chips without a multiplier</h4>
 hand-written DSP inner loop &mdash; and the answer has at most
 &plusmn;0.5 LSB of truncation error. The same discipline applies to
 the other direction: in <code>FR_DEG2BAM</code> the divide-by-360 is
-a compile-time constant, so any optimising compiler folds it into a
+a compile-time constant, so any optimizing compiler folds it into a
 multiply-by-reciprocal (or, on a weaker toolchain, a runtime call
 that you can inline yourself).</p>
 
@@ -614,6 +616,7 @@ <h3>BAM-native (the core)</h3>
 <tbody>
 <tr><td><code>fr_cos_bam</code></td><td><code>s32 fr_cos_bam(u16 bam)</code></td><td>s15.16, range [&minus;65536, +65536]. Exact at cardinal angles.</td></tr>
 <tr><td><code>fr_sin_bam</code></td><td><code>s32 fr_sin_bam(u16 bam)</code></td><td>s15.16, range [&minus;65536, +65536]. Exact at cardinal angles. Defined as <code>fr_cos_bam(bam &minus; FR_BAM_QUADRANT)</code>.</td></tr>
+<tr><td><code>fr_tan_bam</code></td><td><code>s32 fr_tan_bam(u16 bam)</code></td><td>s15.16. Uses a 65-entry octant table for [0, 45&deg;] and the reciprocal identity <code>tan(x) = 1/tan(90&deg;&minus;x)</code> for (45&deg;, 90&deg;). Saturates to <code>&plusmn;FR_TRIG_MAXVAL</code> at the poles (90&deg;, 270&deg;). Returns exact 0 at 0&deg; and 180&deg;. No 64-bit intermediates; one 32-bit division only in the &gt;45&deg; path.</td></tr>
 </tbody>
 </table>
 
@@ -628,16 +631,18 @@ <h3>Radian-native</h3>
 </tbody>
 </table>
 
-<h3>Integer-degree wrappers (legacy API)</h3>
+<h3>Degree wrappers</h3>
 
-<p>The uppercase legacy API takes an angle in degrees.
+<p>The degree API takes an angle in degrees.
 <code>FR_SinI</code>, <code>FR_CosI</code> and <code>FR_TanI</code>
 take plain integer degrees &mdash; the trailing <em>I</em> denotes
-<em>integer</em>. The variants <em>without</em> the <code>I</code>
-suffix (<code>FR_Sin</code>, <code>FR_Cos</code>, <code>FR_Tan</code>)
-accept a <code>radix</code> argument and treat the degree value as
-<em>fixed-point</em>, so you can pass fractional degrees like
-42.375&deg;.</p>
+<em>integer</em>. The primary degree wrappers with a
+<code>radix</code> argument are <code>fr_sin_deg</code>,
+<code>fr_cos_deg</code>, and <code>fr_tan_deg</code> &mdash; they
+treat the degree value as <em>fixed-point</em>, so you can pass
+fractional degrees like 42.375&deg;. The uppercase names
+<code>FR_Sin</code>, <code>FR_Cos</code>, and <code>FR_Tan</code>
+are legacy aliases that map to the same functions.</p>
 
 <table>
 <thead><tr><th>Symbol</th><th>Signature</th><th>Kind</th></tr></thead>
@@ -645,24 +650,23 @@ <h3>Integer-degree wrappers (legacy API)</h3>
 <tr><td><code>FR_SinI</code></td><td><code>FR_SinI(deg)</code> &rarr; <code>s32</code> (s15.16)</td><td>Macro: <code>fr_sin_bam(FR_DEG2BAM(deg))</code>. Zero-cost inline.</td></tr>
 <tr><td><code>FR_CosI</code></td><td><code>FR_CosI(deg)</code> &rarr; <code>s32</code> (s15.16)</td><td>Macro: <code>fr_cos_bam(FR_DEG2BAM(deg))</code>.</td></tr>
 <tr><td><code>FR_TanI</code></td><td><code>s32 FR_TanI(s16 deg)</code></td><td>Function. Returns at radix 16; saturates to <code>&plusmn;INT32_MAX</code> near 90&deg; / 270&deg;.</td></tr>
-<tr><td><code>FR_Sin</code></td><td><code>s32 FR_Sin(s16 deg, u16 radix)</code></td><td><code>deg</code> is fixed-point at <code>radix</code>. Returns s15.16.</td></tr>
-<tr><td><code>FR_Cos</code></td><td><code>s32 FR_Cos(s16 deg, u16 radix)</code></td><td>Same.</td></tr>
-<tr><td><code>FR_Tan</code></td><td><code>s32 FR_Tan(s16 deg, u16 radix)</code></td><td>Returns at radix 16; saturates to <code>&plusmn;INT32_MAX</code> near 90&deg; / 270&deg;.</td></tr>
+<tr><td><code>fr_sin_deg</code></td><td><code>s32 fr_sin_deg(s32 deg, u16 radix)</code></td><td>Function. <code>deg</code> is fixed-point at <code>radix</code>. Returns s15.16.</td></tr>
+<tr><td><code>fr_cos_deg</code></td><td><code>s32 fr_cos_deg(s32 deg, u16 radix)</code></td><td>Function. Same.</td></tr>
+<tr><td><code>fr_tan_deg</code></td><td><code>s32 fr_tan_deg(s32 deg, u16 radix)</code></td><td>Function. Returns at radix 16; saturates to <code>&plusmn;INT32_MAX</code> near 90&deg; / 270&deg;.</td></tr>
+<tr><td><code>FR_Sin</code></td><td><code>FR_Sin(deg, radix)</code></td><td>Legacy macro alias for <code>fr_sin_deg</code>.</td></tr>
+<tr><td><code>FR_Cos</code></td><td><code>FR_Cos(deg, radix)</code></td><td>Legacy macro alias for <code>fr_cos_deg</code>.</td></tr>
+<tr><td><code>FR_Tan</code></td><td><code>FR_Tan(deg, radix)</code></td><td>Legacy macro alias for <code>fr_tan_deg</code>.</td></tr>
 </tbody>
 </table>
 
-<h3>Degree wrappers on the BAM path</h3>
+<h3>Degree-to-BAM path</h3>
 
-<p>If you&rsquo;re using the lowercase family and want to skip the
-radix entirely, two convenience macros cover pure integer degrees:</p>
-
-<table>
-<thead><tr><th>Macro</th><th>Expansion</th></tr></thead>
-<tbody>
-<tr><td><code>fr_cos_deg(deg)</code></td><td><code>fr_cos_bam(FR_DEG2BAM(deg))</code></td></tr>
-<tr><td><code>fr_sin_deg(deg)</code></td><td><code>fr_sin_bam(FR_DEG2BAM(deg))</code></td></tr>
-</tbody>
-</table>
+<p><code>fr_cos_deg</code>, <code>fr_sin_deg</code>, and
+<code>fr_tan_deg</code> are now functions (not macros). They accept
+a fixed-point degree value with a <code>radix</code> argument,
+convert to BAM internally, and call the BAM core. For plain integer
+degrees with no radix parameter, use <code>FR_CosI</code> /
+<code>FR_SinI</code> / <code>FR_TanI</code> instead.</p>
 
 <h2>Inverse trigonometry</h2>
 
diff --git a/pages/guide/building.html b/pages/guide/building.html
index 10b3739..5ab570f 100644
--- a/pages/guide/building.html
+++ b/pages/guide/building.html
@@ -107,42 +107,41 @@ <h3><code>tools/make_release.sh</code></h3>
 
 <h2>The test suite</h2>
 
-<p>Tests live under <code>tests/</code> and are split into six
+<p>Tests live under <code>tests/</code> and are split into seven
 binaries to keep compile times low:</p>
 
 <table>
 <thead><tr><th>Binary</th><th>What it checks</th></tr></thead>
 <tbody>
-<tr><td><code>test_basic</code></td><td>Radix conversions, <code>FR_ADD</code>, <code>FR_FixMuls</code>, rounding.</td></tr>
-<tr><td><code>test_trig</code></td><td>Integer-degree trig (<code>FR_Sin</code> et al.).</td></tr>
-<tr><td><code>test_trig_radians</code></td><td>Radian / BAM trig and the v2 <code>fr_sin</code> API.</td></tr>
-<tr><td><code>test_log_exp</code></td><td>Log base 2 / ln / log10 and their inverses.</td></tr>
+<tr><td><code>fr_test</code></td><td>Radix conversions, <code>FR_ADD</code>, <code>FR_FixMuls</code>, rounding (legacy harness).</td></tr>
+<tr><td><code>test_comprehensive</code></td><td>Trig (degree, radian, BAM), log/exp, sqrt, hypot.</td></tr>
 <tr><td><code>test_2d</code></td><td>2D transforms, determinants, inverses.</td></tr>
-<tr><td><code>test_full_coverage</code></td><td>Dark-corner cases: overflow sentinels, edge radixes, round-trips.</td></tr>
-<tr><td><code>test_tdd</code></td><td>Characterisation tests pinned to bit-exact reference values.</td></tr>
+<tr><td><code>test_overflow</code></td><td>Overflow sentinels, saturation, edge radixes.</td></tr>
+<tr><td><code>test_full</code></td><td>Full-coverage dark-corner cases and round-trips.</td></tr>
+<tr><td><code>test_2d_complete</code></td><td>Extended 2D: matrix composition, inverse, point transforms.</td></tr>
+<tr><td><code>test_tdd</code></td><td>Characterization tests pinned to bit-exact reference values.</td></tr>
 </tbody>
 </table>
 
-<p>As of v2.0.0 the suite contains <strong>42 tests</strong> across
-those binaries and covers <strong>99%</strong> of the library source.
+<p>The suite covers <strong>99%</strong> of the library source.
 Every public symbol is exercised at least once.</p>
 
 <h3>Running a single binary</h3>
 
-<pre><code class="language-bash">make build/test_basic
-./build/test_basic
+<pre><code class="language-bash">make test-comprehensive
+./build/test_comprehensive
 
 # or all of them at once
 make test</code></pre>
 
 <h3>Running the TDD pins after a change</h3>
 
-<p><code>test_tdd.cpp</code> is a characterisation suite. It records
+<p><code>test_tdd.cpp</code> is a characterization suite. It records
 exact bit patterns for a sample of inputs and fails loudly if those
-patterns drift. Any change that modifies the numerical behaviour of
+patterns drift. Any change that modifies the numerical behavior of
 the library will break this suite &mdash; that&rsquo;s the point.</p>
 
-<p>If you <em>intended</em> to change the numerical behaviour (e.g.
+<p>If you <em>intended</em> to change the numerical behavior (e.g.
 you improved a polynomial approximation), update the pinned values in
 <code>tests/test_tdd.cpp</code> and note the change in
 <code>release_notes.md</code> along with any updates to the
@@ -175,52 +174,68 @@ <h2>Cross-compilation</h2>
 <tr><td>Motorola 68k</td><td><code>m68k-linux-gnu-gcc</code></td><td>Docker.</td></tr>
 <tr><td>Motorola 68HC11</td><td><code>m68hc11-gcc</code></td><td>Docker.</td></tr>
 <tr><td>PowerPC</td><td><code>powerpc-linux-gnu-gcc</code></td><td>Docker.</td></tr>
+<tr><td>MIPS32</td><td><code>mipsel-linux-gnu-gcc</code></td><td>Docker.</td></tr>
 <tr><td>Xtensa LX106 (ESP8266)</td><td><code>xtensa-lx106-elf-gcc</code></td><td>Docker.</td></tr>
+<tr><td>Xtensa LX7 (ESP32-S3)</td><td><code>xtensa-esp-elf-gcc</code></td><td>Docker (Espressif toolchain).</td></tr>
 <tr><td>8051</td><td><code>sdcc</code></td><td>Manual.</td></tr>
 </tbody>
 </table>
 
 <h3>Code size (<code>.text</code> section, compiled with <code>-Os</code>)</h3>
 
-<p>Sizes are for <code>FR_math.c</code> compiled with <code>-Os -ffreestanding</code>.
-Core = compiled with <code>-DFR_CORE_ONLY</code> (math only, no print, no waves).
+<p>Sizes are for <code>FR_math.c</code> compiled with <code>-Os</code>.
+Lean = <code>-DFR_LEAN -DFR_NO_PRINT</code> (radian trig, inv trig, log/exp, sqrt).
+Core = <code>-DFR_CORE_ONLY</code> (+ degree trig, BAM tan, log10, hypot).
+Full = all features (+ print, waves, ADSR).
 With <code>-ffunction-sections</code> and linker <code>--gc-sections</code>,
 only the functions your application references are linked, so real flash
 usage will be smaller.</p>
 
 <!-- SIZE_TABLE_START -->
 <table>
-<thead><tr><th>Target</th><th>Core</th><th>Full</th></tr></thead>
+<thead><tr><th>Target</th><th>Lean</th><th>Core</th><th>Full</th></tr></thead>
 <tbody>
-<tr><td>RP2040 (Cortex-M0+)</td><td>2.6 KB</td><td>4.2 KB</td></tr>
-<tr><td>STM32 (Cortex-M4)</td><td>2.6 KB</td><td>4.2 KB</td></tr>
-<tr><td>RISC-V 32 (rv32imac)</td><td>3.0 KB</td><td>4.7 KB</td></tr>
-<tr><td>ESP32 (Xtensa)</td><td>3.5 KB</td><td>5.2 KB</td></tr>
-<tr><td>68k</td><td>3.5 KB</td><td>5.3 KB</td></tr>
-<tr><td>x86-64 (GCC)</td><td>3.5 KB</td><td>5.7 KB</td></tr>
-<tr><td>x86-32</td><td>4.5 KB</td><td>6.8 KB</td></tr>
-<tr><td>MSP430 (16-bit)</td><td>5.9 KB</td><td>8.9 KB</td></tr>
-<tr><td>68HC11</td><td>10.8 KB</td><td>16.0 KB</td></tr>
-<tr><td>AVR (ATmega328P)</td><td>7.0 KB</td><td>10.6 KB</td></tr>
+<tr><td>Xtensa LX7 (ESP32-S3)</td><td>2.9 KB</td><td>4.2 KB</td><td>5.3 KB</td></tr>
+<tr><td>Cortex-M4 (STM32)</td><td>3.3 KB</td><td>4.4 KB</td><td>5.5 KB</td></tr>
+<tr><td>Cortex-M0 (RP2040)</td><td>3.4 KB</td><td>4.5 KB</td><td>5.7 KB</td></tr>
+<tr><td>ARM Thumb</td><td>3.4 KB</td><td>4.7 KB</td><td>5.9 KB</td></tr>
+<tr><td>RISC-V rv64</td><td>4.0 KB</td><td>5.5 KB</td><td>6.8 KB</td></tr>
+<tr><td>RISC-V rv32</td><td>4.1 KB</td><td>5.5 KB</td><td>6.8 KB</td></tr>
+<tr><td>Xtensa LX106 (ESP8266)</td><td>4.2 KB</td><td>5.8 KB</td><td>7.3 KB</td></tr>
+<tr><td>ARM32</td><td>4.3 KB</td><td>5.8 KB</td><td>7.7 KB</td></tr>
+<tr><td>68k</td><td>4.4 KB</td><td>6.2 KB</td><td>7.8 KB</td></tr>
+<tr><td>MIPS32</td><td>4.7 KB</td><td>6.6 KB</td><td>8.7 KB</td></tr>
+<tr><td>x86-64 (GCC)</td><td>4.6 KB</td><td>6.1 KB</td><td>8.0 KB</td></tr>
+<tr><td>AArch64 (ARM64)</td><td>4.8 KB</td><td>6.6 KB</td><td>8.7 KB</td></tr>
+<tr><td>x86-32</td><td>5.3 KB</td><td>7.2 KB</td><td>9.2 KB</td></tr>
+<tr><td>PowerPC</td><td>5.8 KB</td><td>8.0 KB</td><td>10.4 KB</td></tr>
+<tr><td>MSP430 (16-bit)</td><td>7.8 KB</td><td>10.7 KB</td><td>12.8 KB</td></tr>
+<tr><td>AVR (ATmega328P)</td><td>9.2 KB</td><td>12.8 KB</td><td>15.4 KB</td></tr>
+<tr><td>68HC11</td><td>13.3 KB</td><td>18.4 KB</td><td>22.6 KB</td></tr>
 </tbody>
 </table>
 <!-- SIZE_TABLE_END -->
 
 <h3>Lean build options</h3>
 
-<p>Three compile-time <code>#define</code> guards let you strip optional subsystems
+<p>Compile-time <code>#define</code> guards let you strip optional subsystems
 for ROM-constrained targets. Define them before including
 <code>FR_math.h</code> (or pass <code>-D</code> on the compiler command line):</p>
 
 <table>
 <thead><tr><th>Define</th><th>What it removes</th><th>Typical savings</th></tr></thead>
 <tbody>
-<tr><td><code>FR_CORE_ONLY</code></td><td>Everything below (print + waves)</td><td>~1.9 KB</td></tr>
+<tr><td><code>FR_LEAN</code></td><td>Degree trig, BAM tan, angle converters, <code>FR_log10</code>, <code>FR_hypot</code>, waves + ADSR</td><td>~3.7 KB</td></tr>
+<tr><td><code>FR_CORE_ONLY</code></td><td>Print + waves (shorthand for both below)</td><td>~1.9 KB</td></tr>
 <tr><td><code>FR_NO_PRINT</code></td><td><code>FR_printNumF</code>, <code>FR_printNumD</code>, <code>FR_printNumH</code>, <code>FR_numstr</code></td><td>~1.3 KB</td></tr>
 <tr><td><code>FR_NO_WAVES</code></td><td><code>fr_wave_*</code> (6 shapes), <code>fr_adsr_*</code> (ADSR envelope), <code>FR_HZ2BAM_INC</code></td><td>~0.6 KB</td></tr>
 </tbody>
 </table>
 
+<p><code>FR_LEAN</code> keeps only radian trig (sin, cos, tan), inverse trig, sqrt,
+log2, ln, exp, pow2, and arithmetic &mdash; comparable to libfixmath&rsquo;s API at
+4.7&nbsp;KB text. <code>FR_LEAN</code> implies <code>FR_NO_WAVES</code>.</p>
+
 <p><code>FR_CORE_ONLY</code> is a convenience shorthand that defines both
 <code>FR_NO_PRINT</code> and <code>FR_NO_WAVES</code> in one step.</p>
 
@@ -236,7 +251,7 @@ <h3>Lean build options</h3>
 <p>To regenerate this table, run the Docker cross-build
 (requires the <a href="https://github.com/deftio/xelp">xelp</a> Docker image):</p>
 
-<pre><code class="language-bash">scripts/crossbuild-docker.sh</code></pre>
+<pre><code class="language-bash">scripts/crossbuild_sizes.sh</code></pre>
 
 <h3>Example: RISC-V</h3>
 
diff --git a/pages/guide/examples.html b/pages/guide/examples.html
index 137525b..273fede 100644
--- a/pages/guide/examples.html
+++ b/pages/guide/examples.html
@@ -18,7 +18,7 @@
 <h1>Examples</h1>
 
 <p>Short, runnable snippets for the most common FR_Math tasks. Each
-example compiles cleanly against the v2.0.0 library with:</p>
+example compiles cleanly against the library with:</p>
 
 <pre><code class="language-bash">cc -Isrc example.c src/FR_math.c -o example
 ./example</code></pre>
@@ -70,8 +70,9 @@ <h2>1. Basic radix conversion</h2>
 <h2>2. Trig &mdash; integer degrees vs radian vs BAM</h2>
 
 <p>FR_Math supports three angle conventions and this example hits
-all three: integer degrees through the legacy
-<code>FR_Sin</code> / <code>FR_Cos</code> API, the radian-native
+all three: integer degrees through
+<code>fr_sin_deg</code> / <code>fr_cos_deg</code> (or the legacy
+aliases <code>FR_Sin</code> / <code>FR_Cos</code>), the radian-native
 <code>fr_sin</code> / <code>fr_cos</code> (radian at a chosen
 input radix), and BAM-native <code>fr_sin_bam</code> /
 <code>fr_cos_bam</code>. All three paths feed the same 129-entry
@@ -79,7 +80,7 @@ <h2>2. Trig &mdash; integer degrees vs radian vs BAM</h2>
 identical results.</p>
 
 <p><em>Caveats:</em> the <code>radix</code> parameter on
-<code>FR_Sin(deg, radix)</code> is the radix of the <em>degree
+<code>fr_sin_deg(deg, radix)</code> is the radix of the <em>degree
 input</em>, not the output. All sin/cos functions return
 <strong>s15.16</strong> &mdash; that is, <code>s32</code> at radix 16,
 where 1.0 = 65536 (<code>FR_TRIG_ONE</code>). The values compared
@@ -208,19 +209,16 @@ <h2>4. Logarithm, exponential, decibels</h2>
 <h2>5. Arctangent and atan2</h2>
 
 <p>The inverse-trig functions in FR_Math return angles in
-<strong>degrees</strong>, not radians &mdash; the output fits in
-an <code>s16</code> and you can feed it straight back into
-<code>FR_SinI</code> / <code>FR_CosI</code> without any
-conversion. This example exercises both <code>FR_atan</code>
-(single-argument ratio) and <code>FR_atan2</code> (full-circle,
-two-argument).</p>
-
-<p><em>Caveats:</em> <code>FR_atan2</code> takes only two
-arguments (<code>y</code>, <code>x</code>) and has no radix
-parameter &mdash; it returns degrees in [&minus;180, 180] as
-<code>s16</code>. The <code>radix</code> argument on
-<code>FR_atan</code> is the radix of the <em>input</em> ratio,
-not of the output.</p>
+<strong>radians</strong> at a caller-chosen output radix. This
+example exercises both <code>FR_atan</code> (single-argument
+ratio) and <code>FR_atan2</code> (full-circle, two-argument).</p>
+
+<p><em>Caveats:</em> all inverse-trig functions take an
+<code>out_radix</code> parameter that sets the radix of the
+<em>output</em>. <code>FR_atan2(y, x, out_radix)</code> returns
+radians in [&minus;&pi;, &pi;] as <code>s32</code> at the chosen
+radix. <code>FR_atan(input, radix, out_radix)</code> has
+separate radixes for input and output.</p>
 
 <pre><code class="language-c">#include &lt;stdio.h&gt;
 #include "FR_math.h"
@@ -229,18 +227,19 @@ <h2>5. Arctangent and atan2</h2>
 {
     const u16 r = 14;
 
-    /* atan(1) = 45 degrees */
-    s16 a = FR_atan(I2FR(1, r), r);
-    printf("atan(1) = %d degrees (expect 45)\n", a);
+    /* atan(1) = pi/4 radians &asymp; 0.7854 */
+    s32 a = FR_atan(I2FR(1, r), r, r);
+    printf("atan(1) = %d (radix %d, expect ~%d)\n",
+           (int)a, r, (int)(12868));  /* pi/4 at r14 */
 
     /* Full-circle atan2 */
-    s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r));  /*  135 deg */
-    s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r));  /* -135 deg */
-    printf("atan2( 1,-1) = %d\n", q2);
-    printf("atan2(-1,-1) = %d\n", q3);
+    s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r);  /*  3*pi/4 */
+    s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r);  /* -3*pi/4 */
+    printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603));
+    printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603));
 
     /* asin with out-of-domain input */
-    s16 bad = FR_asin(I2FR(2, r), r);
+    s32 bad = FR_asin(I2FR(2, r), r, r);
     if (bad == FR_DOMAIN_ERROR)
         printf("asin(2) rejected, good.\n");
     return 0;
@@ -423,7 +422,7 @@ <h2>10. Integer-only 2D transform for scanline renderers</h2>
 coordinates in and writes <code>s16</code> out. It&rsquo;s a tiny
 bit lossier than the <code>s32</code> form, but it sidesteps all
 the fixed-point conversion on the hot path &mdash; useful inside
-the inner loop of a scanline rasteriser where you already know
+the inner loop of a scanline rasterizer where you already know
 your coordinates fit in 16 bits.</p>
 
 <p><em>Caveats:</em> the output is narrowed to <code>s16</code>,
@@ -510,7 +509,7 @@ <h2>11. String round-trip and radix precision</h2>
         FR_printNumF(buf_putc, val, 16, 0, 8);
         printf("    16     16  0x%08x  %s\n", (unsigned)val, buf);
         /* Expected: "3.14158630" &mdash; good through 5 digits, then
-         * quantisation noise appears.  This is the sweet spot for
+         * quantization noise appears.  This is the sweet spot for
          * most embedded work: 16 bits of fraction fits in an s32
          * with 15 bits of integer range (&plusmn;32767). */
     }
@@ -558,10 +557,61 @@ <h2>11. String round-trip and radix precision</h2>
 so the decimal rendering can only faithfully reproduce about two
 fractional digits. At radix 24 the value is <code>0x03243F6A</code> &mdash; 26
 significant bits &mdash; and seven decimal digits survive. The
-eighth digit (<code>5</code> vs <code>4</code>) shows the quantisation floor:
+eighth digit (<code>5</code> vs <code>4</code>) shows the quantization floor:
 <code>2^&minus;24 &asymp; 6 &times; 10^&minus;8</code>, so the last digit is always
 uncertain.</p>
 
+<h2>Desktop example programs</h2>
+
+<p>In addition to the inline snippets above, the <code>examples/</code> directory
+contains four self-contained desktop programs. Each has its own
+<code>Makefile</code> and <code>README.md</code>; build artifacts stay within the
+example&rsquo;s directory.</p>
+
+<table>
+<thead><tr><th>Directory</th><th>What it does</th></tr></thead>
+<tbody>
+<tr>
+  <td><code>examples/fixed-point-basics/</code></td>
+  <td>Educational walkthrough of radix interpretation, <code>I2FR</code>/<code>FR2I</code>
+      round-trips, <code>FR_NUM</code> constant construction, aligned add/sub,
+      multiply precision, division, saturation, and <code>FR_printNumF</code>
+      formatted output.</td>
+</tr>
+<tr>
+  <td><code>examples/log-exp-curves/</code></td>
+  <td>Sweeps <code>FR_log2</code>, <code>FR_ln</code>, <code>FR_log10</code>,
+      <code>FR_pow2</code>, <code>FR_EXP</code>, <code>FR_POW10</code>, and
+      <code>FR_sqrt</code> against IEEE double reference values, printing
+      per-point and summary error tables.</td>
+</tr>
+<tr>
+  <td><code>examples/waveform-synth/</code></td>
+  <td>Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus
+      an ADSR envelope and amplitude-modulated combination. Default mode renders
+      ASCII art; <code>--csv</code> mode outputs machine-readable CSV.</td>
+</tr>
+<tr>
+  <td><code>examples/trig-accuracy/</code></td>
+  <td>Head-to-head comparison of FR_Math
+      (<code>FR_SinI</code>/<code>FR_CosI</code>/<code>FR_TanI</code>) vs
+      libfixmath (<code>fix16_sin</code>/<code>fix16_cos</code>/<code>fix16_tan</code>)
+      vs IEEE double over 0&ndash;360 degrees. Requires libfixmath source.</td>
+</tr>
+</tbody>
+</table>
+
+<p>Build all from the repo root:</p>
+
+<pre><code class="language-bash">make examples        # builds all desktop examples
+make run-examples    # builds and runs 1-3, plus 4 if libfixmath present</code></pre>
+
+<p>Or build any single example from its directory:</p>
+
+<pre><code class="language-bash">cd examples/waveform-synth
+make run             # ASCII art output
+make run-csv         # CSV output</code></pre>
+
 <h2>See also</h2>
 
 <ul>
diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html
index a4e71c4..73325ef 100644
--- a/pages/guide/fixed-point-primer.html
+++ b/pages/guide/fixed-point-primer.html
@@ -294,7 +294,7 @@ <h2>Notation: sM.N and the radix</h2>
 radix&rdquo;, think of the radix as a <em>type annotation that lives
 in your source code</em>, not a runtime field.</p>
 
-<h2>Quantisation and loss of precision</h2>
+<h2>Quantization and loss of precision</h2>
 
 <p>Fixing the radix also fixes the smallest representable
 fractional step. At radix <em>N</em>, that step is
@@ -302,7 +302,7 @@ <h2>Quantisation and loss of precision</h2>
 the round-trip into the integer. Any real value smaller than the
 step rounds to zero; any real value landing between two adjacent
 steps rounds to one of them. The difference between the ideal
-value and its stored form is called <strong>quantisation
+value and its stored form is called <strong>quantization
 error</strong>, and it is the main price paid for doing
 fractional math in integer registers.</p>
 
@@ -323,7 +323,7 @@ <h2>Quantisation and loss of precision</h2>
 error                         =  0.00000153  (&lt; 0.002 %)
 </code></pre>
 
-<p>This behaviour isn&rsquo;t a bug &mdash; it is the same
+<p>This behavior isn&rsquo;t a bug &mdash; it is the same
 compromise IEEE-754 floating point makes with its mantissa. The
 difference is that a float hides the trade-off behind a variable
 exponent, while fixed-point puts it on a ledger that the
@@ -336,7 +336,7 @@ <h2>Quantisation and loss of precision</h2>
 vanish; any finer and integer headroom is being spent for no
 benefit.</p>
 
-<p>A second consequence worth recording: quantisation error
+<p>A second consequence worth recording: quantization error
 <em>accumulates</em>. Summing a million low-radix values sums
 the errors too. Signal-processing pipelines with long feedback
 paths are the main reason to carry accumulators at a wider radix
@@ -406,7 +406,7 @@ <h2>Displaying a fixed-point value</h2>
 usable on targets without stdio &mdash; a UART write, an LCD
 glyph pusher, a ring-buffer append. The <code>pad</code>
 parameter sets a minimum field width and <code>prec</code> sets
-the number of fractional digits. Rounding behaviour matches the
+the number of fractional digits. Rounding behavior matches the
 hand-rolled version: excess fractional digits are truncated, and
 negative values are handled without the two&rsquo;s-complement
 trap described above.</p>
@@ -415,7 +415,7 @@ <h2>Arithmetic: what the operations actually do</h2>
 
 <p>Once you&rsquo;ve chosen a radix, the everyday operations behave
 almost like integer math &mdash; with one or two twists per
-operation that you just have to internalise. Let&rsquo;s walk
+operation that you just have to internalize. Let&rsquo;s walk
 through them.</p>
 
 <h3>Addition and subtraction</h3>
@@ -489,7 +489,7 @@ <h3>Multiplication</h3>
       doesn&rsquo;t fire. <strong>Rounds to nearest</strong> &mdash;
       adds 0.5&nbsp;LSB before the shift.</li>
   <li><code>FR_FixMulSat(a, b, r)</code> &mdash; same shape with
-      the same round-to-nearest behaviour, but also saturates to
+      the same round-to-nearest behavior, but also saturates to
       <code>FR_OVERFLOW_POS</code> /
       <code>FR_OVERFLOW_NEG</code> if the result wouldn&rsquo;t
       fit. Prefer this one by default unless you&rsquo;ve proven
@@ -555,7 +555,7 @@ <h3>Division</h3>
       division truncates toward zero for both signs, so
       <code>&minus;7 / 2 == &minus;3</code> (not
       <code>&minus;4</code>). Fixed-point division inherits
-      that behaviour. Round-to-nearest can be layered on top by
+      that behavior. Round-to-nearest can be layered on top by
       adding <code>b / 2</code> (for a positive numerator) or
       <code>&minus;b / 2</code> (for a negative numerator) to
       the pre-scaled numerator before the divide.</li>
@@ -581,7 +581,7 @@ <h3>Changing radix</h3>
   <li>Going to a <em>smaller</em> radix &mdash; the low bits are
       dropped. Precision is lost; headroom grows. This is a good
       place to add <code>&plusmn; (1 &lt;&lt; (from_r - to_r - 1))</code>
-      before the shift if you want round-to-nearest behaviour.</li>
+      before the shift if you want round-to-nearest behavior.</li>
 </ul>
 
 <p>The value is conserved as closely as the destination radix can
@@ -644,7 +644,7 @@ <h2>Overflow, saturation, and the sentinels</h2>
 about it, you will eventually pass a pair of inputs whose product
 doesn&rsquo;t fit, and plain C will hand you wrap-around garbage
 with no warning. A signed 32-bit multiply that overflows is not a
-runtime error in C &mdash; it&rsquo;s undefined behaviour that
+runtime error in C &mdash; it&rsquo;s undefined behavior that
 happens to look like data most of the time.</p>
 
 <p>FR_Math defends against this in three layers, and it&rsquo;s
@@ -743,12 +743,12 @@ <h2>Choosing a radix</h2>
 <h2>A worked example: one-pole IIR low-pass filter</h2>
 
 <p>The sections up to this point have introduced the pieces
-individually: scaling, notation, quantisation, arithmetic,
+individually: scaling, notation, quantization, arithmetic,
 overflow, and radix choice. A small end-to-end example is the
 fastest way to see how those pieces fit together on a real
 pipeline. The filter walked through below is a single-pole
 infinite-impulse-response (IIR) low-pass &mdash; about the
-simplest entry in the DSP catalogue, but realistic enough to
+simplest entry in the DSP catalog, but realistic enough to
 exercise nearly every decision the primer has covered so far.</p>
 
 <p>In floating point, the filter is one line of arithmetic:</p>
@@ -790,7 +790,7 @@ <h3>Step 1: inventory the ranges</h3>
       &plusmn;32767 output range. But because it accumulates
       small updates on every sample, it will drift and lose
       precision unless carried at a higher radix than the raw
-      input. This is the quantisation-error accumulation noted
+      input. This is the quantization-error accumulation noted
       earlier in the primer, showing up in practice.</li>
 </ul>
 
@@ -889,7 +889,7 @@ <h3>Step 5: test against the reference</h3>
 exercise the relevant paths &mdash; and reports the worst-case
 delta. For a radix-15 one-pole IIR the expected worst-case
 difference is on the order of a few LSB, comparable to the
-inherent quantisation of the 16-bit output format and not
+inherent quantization of the 16-bit output format and not
 audible in normal listening. Anything substantially larger
 indicates a radix choice that is too tight, a rounding mode
 that is drifting, or a missing int64 promotion on the
@@ -911,8 +911,8 @@ <h2>FR_Math&rsquo;s naming conventions</h2>
 <thead><tr><th>Prefix</th><th>What it is</th><th>Example</th></tr></thead>
 <tbody>
 <tr><td><code>FR_XXX()</code></td><td><code>UPPERCASE</code> macro &mdash; inline, zero call overhead.</td><td><code>FR_ADD</code>, <code>FR_ABS</code>, <code>FR2I</code></td></tr>
-<tr><td><code>FR_Xxx()</code></td><td>Mixed-case C function &mdash; the classic v1 API. Integer-degree trig and related.</td><td><code>FR_Sin</code>, <code>FR_log2</code>, <code>FR_sqrt</code></td></tr>
-<tr><td><code>fr_xxx()</code></td><td>Lowercase C function &mdash; v2 additions (radian / BAM trig, wave generators, ADSR).</td><td><code>fr_sin</code>, <code>fr_wave_tri</code>, <code>fr_adsr_step</code></td></tr>
+<tr><td><code>FR_Xxx()</code></td><td>Mixed-case C function or legacy alias. <code>FR_Sin</code>/<code>FR_Cos</code>/<code>FR_Tan</code> are legacy aliases for <code>fr_sin_deg</code>/<code>fr_cos_deg</code>/<code>fr_tan_deg</code>.</td><td><code>FR_log2</code>, <code>FR_sqrt</code>, <code>FR_Sin</code> (legacy)</td></tr>
+<tr><td><code>fr_xxx()</code></td><td>Lowercase C function &mdash; the current API for degree wrappers, radian / BAM trig, wave generators, ADSR.</td><td><code>fr_sin_deg</code>, <code>fr_cos_deg</code>, <code>fr_sin</code>, <code>fr_wave_tri</code>, <code>fr_adsr_step</code></td></tr>
 <tr><td><code>s8, s16, s32</code></td><td>Signed integer typedefs (aliases for <code>int8_t</code>, <code>int16_t</code>, <code>int32_t</code>).</td><td>&mdash;</td></tr>
 <tr><td><code>u8, u16, u32</code></td><td>Unsigned integer typedefs.</td><td>&mdash;</td></tr>
 </tbody>
@@ -973,14 +973,14 @@ <h2>Angle representations</h2>
 <code>u16</code> wraparound <em>is</em> the angular modulus &mdash;
 that&rsquo;s the whole feature. Adding two <code>u16</code> BAM
 values automatically gives you the right answer modulo a full
-revolution, with zero quantisation error at the boundary and no
+revolution, with zero quantization error at the boundary and no
 <code>% 65536</code> in sight. If BAM were <code>s32</code>, every
 read of the table would have to explicitly mask off the top bits
 (and handle negative values) before the quadrant extraction
 (<code>bam &gt;&gt; 14</code>) made any sense. You would have traded
 one free operation for two slow ones on every sample, just to get
-back the same behaviour. So instead, the public trig entry points
-(<code>FR_CosI</code>, <code>FR_Cos</code>, <code>fr_cos</code>, and
+back the same behavior. So instead, the public trig entry points
+(<code>FR_CosI</code>, <code>fr_cos_deg</code>, <code>fr_cos</code>, and
 friends) <em>all</em> take signed angles &mdash; in degrees,
 fixed-radix degrees, or radians &mdash; and only the internal
 <code>fr_cos_bam</code> / <code>fr_sin_bam</code> primitives see
diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html
index b6a22ed..e5076bf 100644
--- a/pages/guide/getting-started.html
+++ b/pages/guide/getting-started.html
@@ -31,8 +31,8 @@ <h2>Install</h2>
 <ul>
   <li>Copy <code>src/FR_math.c</code>, <code>src/FR_math.h</code>,
       <code>src/FR_defs.h</code> (and optionally
-      <code>src/FR_math_2D.cpp</code>, <code>src/FR_math_2D.h</code>,
-      and <code>src/FR_trig_table.h</code>) into the target project, <strong>or</strong></li>
+      <code>src/FR_math_2D.cpp</code>, <code>src/FR_math_2D.h</code>)
+      into the target project, <strong>or</strong></li>
   <li>Add FR_Math as a git submodule and point the build system at
       <code>src/</code>.</li>
 </ul>
@@ -46,7 +46,7 @@ <h2>Install</h2>
 
 <p><code>build.sh</code> wipes <code>build/</code>, rebuilds the
 library, examples, and tests, and runs the full test suite. On success
-the output shows 42 tests passing across six test binaries.</p>
+the output shows all tests passing (99% line coverage).</p>
 
 <h2>A first program</h2>
 
@@ -286,8 +286,10 @@ <h2>Running the test suite</h2>
 <pre><code class="language-bash">make test           # build + run every test suite
 make coverage       # coverage report (requires gcov)</code></pre>
 
-<p>As of v2.0.1, FR_Math ships with 42 passing tests and 99% line
-coverage across the library sources.</p>
+<p>Run <code>make test</code> for a full pass. With <code>make coverage</code>,
+line coverage of the library sources is about <strong>99%</strong>.
+See <a href="building.html">Building &amp; Testing</a> for targets,
+cross-compilation, and CI.</p>
 
 <h2>Next steps</h2>
 
@@ -297,7 +299,7 @@ <h2>Next steps</h2>
       conventions work.</li>
   <li><strong><a href="api-reference.html">API Reference</a></strong>
       &mdash; per-symbol inputs, outputs, precision, and saturation
-      behaviour.</li>
+      behavior.</li>
   <li><strong><a href="examples.html">Examples</a></strong> &mdash;
       runnable snippets for common tasks.</li>
   <li><strong><a href="building.html">Building &amp; Testing</a></strong>
diff --git a/pages/index.html b/pages/index.html
index ac84759..b449193 100644
--- a/pages/index.html
+++ b/pages/index.html
@@ -34,7 +34,7 @@ <h1>FR_Math</h1>
       Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc,
       RISC-V toolchains, and Arduino.</li>
   <li>Zero dependencies beyond <code>&lt;stdint.h&gt;</code>.</li>
-  <li>Parameterised radix: every function takes the binary point as an
+  <li>Parameterized radix: every function takes the binary point as an
       argument, so you choose how many fractional bits you need per
       call.</li>
   <li>Deterministic, bounded error — every public symbol has a
@@ -46,31 +46,35 @@ <h2>Measured accuracy</h2>
 
 <p>Errors below are measured at Q16.16 (s15.16). All functions accept any
 radix &mdash; Q16.16 is just the reference point for the table.
-See the <a href="https://github.com/deftio/fr_math/blob/master/build/test_tdd_report.md">TDD
-report</a> for sweeps at radixes 8, 12, 16, and 24.
-Percent errors skip expected values near zero (|expected| &lt; 0.01).</p>
+Run <code>make test-tdd</code> to generate the TDD report
+(<code>build/test_tdd_report.md</code>) with sweeps at radixes 8, 12, 16, and 24.</p>
 
 <!-- ACCURACY_TABLE_START -->
 <table>
-<thead><tr><th>Function</th><th>Max err (%)</th><th>Avg err (%)</th><th>Note</th></tr></thead>
+<thead><tr><th>Function</th><th>Max err (%)*</th><th>Avg err (%)</th><th>Note</th></tr></thead>
 <tbody>
-<tr><td>sin / cos</td><td>0.7169</td><td>0.0100</td><td>65536-pt sweep + specials</td></tr>
-<tr><td>tan</td><td>0.7118</td><td>0.0162</td><td>65536-pt sweep (skip poles)</td></tr>
-<tr><td>asin / acos</td><td>0.7025</td><td>0.0105</td><td>65536-pt; sqrt approx near boundary</td></tr>
-<tr><td>atan2</td><td>0.4953</td><td>0.0268</td><td>65536x5 radii; asin/acos+hypot_fast8</td></tr>
-<tr><td>atan</td><td>0.2985</td><td>0.0159</td><td>20001-pt sweep [-10,10]; via FR_atan2</td></tr>
-<tr><td>sqrt</td><td>0.0003</td><td>0.0000</td><td>Round-to-nearest</td></tr>
-<tr><td>log2</td><td>0.2479</td><td>0.0045</td><td>65-entry mantissa table</td></tr>
-<tr><td>pow2</td><td>0.1373</td><td>0.0057</td><td>65-entry fraction table</td></tr>
-<tr><td>ln, log10</td><td>0.0015</td><td>0.0004</td><td>Via FR_MULK28 from log2</td></tr>
-<tr><td>exp</td><td>0.0719</td><td>0.0051</td><td>FR_MULK28 + FR_pow2</td></tr>
-<tr><td>exp_fast</td><td>0.0719</td><td>0.0064</td><td>Shift-only scaling</td></tr>
-<tr><td>pow10</td><td>0.1163</td><td>0.0075</td><td>FR_MULK28 + FR_pow2</td></tr>
-<tr><td>pow10_fast</td><td>0.1163</td><td>0.0100</td><td>Shift-only scaling</td></tr>
-<tr><td>hypot (exact)</td><td>0.0001</td><td>0.0000</td><td>64-bit intermediate</td></tr>
-<tr><td>hypot_fast8 (8-seg)</td><td>0.0977</td><td>0.0508</td><td>Shift-only, no multiply</td></tr>
+<tr><td>sin/cos (BAM)</td><td>0.1526</td><td>0.0030</td><td>very fast binary angle trig</td></tr>
+<tr><td>sin/cos (deg)</td><td>0.1526</td><td>0.0029</td><td>degree input trig fns</td></tr>
+<tr><td>sin/cos (rad)</td><td>0.1828</td><td>0.0033</td><td>radian (traditional) trig</td></tr>
+<tr><td>tan (BAM)</td><td>0.5823</td><td>0.0008</td><td>binary angle tangent; ±maxint at poles</td></tr>
+<tr><td>tan (deg)</td><td>0.5311</td><td>0.0008</td><td>degree input tangent; saturated at poles</td></tr>
+<tr><td>tan (rad)</td><td>0.0386</td><td>0.0001</td><td>radian (traditional) tangent</td></tr>
+<tr><td>asin / acos</td><td>0.7771</td><td>0.0280</td><td>reverse trig, radian output</td></tr>
+<tr><td>atan2</td><td>0.2564</td><td>0.0237</td><td>reverse tangent, always safe</td></tr>
+<tr><td>atan</td><td>0.2425</td><td>0.0155</td><td>reverse tangent, accepts up to maxint</td></tr>
+<tr><td>sqrt</td><td>0.0000</td><td>0.0000</td><td>Round-to-nearest</td></tr>
+<tr><td>log2</td><td>0.0116</td><td>0.0016</td><td>shift/add only for speed</td></tr>
+<tr><td>pow2</td><td>0.0018</td><td>0.0004</td><td>shift/add only for speed</td></tr>
+<tr><td>ln, log10</td><td>0.0004</td><td>0.0000</td><td>shift/add only for speed</td></tr>
+<tr><td>exp</td><td>0.0003</td><td>0.0000</td><td>shift/add only for speed</td></tr>
+<tr><td>exp_fast</td><td>0.0009</td><td>0.0001</td><td>Shift-only scaling</td></tr>
+<tr><td>pow10</td><td>0.0005</td><td>0.0000</td><td>shift/add only for speed</td></tr>
+<tr><td>pow10_fast</td><td>0.0022</td><td>0.0002</td><td>Shift-only scaling</td></tr>
+<tr><td>hypot (exact)</td><td>0.0000</td><td>0.0000</td><td>Uses 64-bit intermediate</td></tr>
+<tr><td>hypot_fast8 (8-seg)</td><td>0.0915</td><td>0.0320</td><td>Shift-only, no multiply</td></tr>
 </tbody>
 </table>
+<p><em>*Relative error; reference clamped to 1% of full-scale output.</em></p>
 <!-- ACCURACY_TABLE_END -->
 
 <h2>What&rsquo;s in the box</h2>
@@ -80,8 +84,8 @@ <h2>What&rsquo;s in the box</h2>
 <tbody>
 <tr><td>Arithmetic</td><td><code>FR_ADD</code>, <code>FR_SUB</code>, <code>FR_DIV</code>, <code>FR_DIV32</code>, <code>FR_MOD</code>, <code>FR_FixMuls</code>, <code>FR_FixMulSat</code>, <code>FR_CHRDX</code></td></tr>
 <tr><td>Utility</td><td><code>FR_MIN</code>, <code>FR_MAX</code>, <code>FR_CLAMP</code>, <code>FR_ABS</code>, <code>FR_SGN</code></td></tr>
-<tr><td>Trig (integer deg)</td><td><code>FR_Sin</code>, <code>FR_Cos</code>, <code>FR_Tan</code>, <code>FR_SinI</code>, <code>FR_CosI</code>, <code>FR_TanI</code></td></tr>
-<tr><td>Trig (radian/BAM)</td><td><code>fr_sin</code>, <code>fr_cos</code>, <code>fr_tan</code>, <code>fr_sin_bam</code>, <code>fr_cos_bam</code>, <code>fr_sin_deg</code>, <code>fr_cos_deg</code></td></tr>
+<tr><td>Trig (integer deg)</td><td><code>fr_sin_deg</code>, <code>fr_cos_deg</code>, <code>fr_tan_deg</code>, <code>FR_SinI</code>, <code>FR_CosI</code>, <code>FR_TanI</code></td></tr>
+<tr><td>Trig (radian/BAM)</td><td><code>fr_sin</code>, <code>fr_cos</code>, <code>fr_tan</code>, <code>fr_sin_bam</code>, <code>fr_cos_bam</code>, <code>fr_tan_bam</code></td></tr>
 <tr><td>Inverse trig</td><td><code>FR_atan</code>, <code>FR_atan2</code>, <code>FR_asin</code>, <code>FR_acos</code></td></tr>
 <tr><td>Log / exp</td><td><code>FR_log2</code>, <code>FR_ln</code>, <code>FR_log10</code>, <code>FR_pow2</code>, <code>FR_EXP</code>, <code>FR_POW10</code>, <code>FR_EXP_FAST</code>, <code>FR_POW10_FAST</code>, <code>FR_MULK28</code></td></tr>
 <tr><td>Roots</td><td><code>FR_sqrt</code>, <code>FR_hypot</code>, <code>FR_hypot_fast8</code></td></tr>
@@ -98,21 +102,24 @@ <h2>What&rsquo;s in the box</h2>
 
 <h2>Lean build options</h2>
 
-<p>Two compile-time <code>#define</code> guards let you strip optional subsystems
+<p>Compile-time <code>#define</code> guards let you strip optional subsystems
 for ROM-constrained targets. Define them before including
 <code>FR_math.h</code> (or pass <code>-D</code> on the compiler command line):</p>
 
 <table>
 <thead><tr><th>Define</th><th>What it removes</th><th>Typical savings</th></tr></thead>
 <tbody>
+<tr><td><code>FR_LEAN</code></td><td>Degree trig, BAM tan, angle converters, <code>FR_log10</code>, <code>FR_hypot</code>, waves + ADSR</td><td>~3.7 KB</td></tr>
 <tr><td><code>FR_NO_PRINT</code></td><td><code>FR_printNumF</code>, <code>FR_printNumD</code>, <code>FR_printNumH</code>, <code>FR_numstr</code></td><td>~1.3 KB</td></tr>
 <tr><td><code>FR_NO_WAVES</code></td><td><code>fr_wave_*</code> (6 shapes), <code>fr_adsr_*</code> (ADSR envelope), <code>FR_HZ2BAM_INC</code></td><td>~0.6 KB</td></tr>
 </tbody>
 </table>
 
-<p>With both guards enabled the core math library (trig, inverse trig, log/exp,
-sqrt, hypot) compiles to ~3.5 KB on x86-64 / clang -Os. On Thumb-2 this
-would be roughly 2.6 KB.</p>
+<p><code>FR_LEAN</code> keeps only radian trig (sin, cos, tan), inverse trig,
+sqrt, log2, ln, exp, pow2, and arithmetic &mdash; comparable to libfixmath&rsquo;s
+API but at 4.7 KB text vs libfixmath&rsquo;s 4.9 KB + 112 KB BSS.
+With <code>FR_LEAN</code> + <code>FR_NO_PRINT</code> the library compiles to
+~4.7 KB on x86-64 / clang -Os.</p>
 
 <pre><code class="language-c">/* Example: headless sensor node &mdash; math only, no print, no audio */
 #define FR_NO_PRINT
@@ -124,18 +131,18 @@ <h2>Lean build options</h2>
 are most useful when you include the library as a single <code>.c</code> file
 or static archive without section-level dead-code elimination.</p>
 
-<h2>Why fixed-point, in 2026?</h2>
+<h2>Why fixed-point?</h2>
 
-<p>Most application code today has an FPU and can use <code>float</code>
-freely. But there are still large, interesting corners where
-fixed-point pays off:</p>
+<p>Many modern microcontrollers have an FPU and can use <code>float</code>
+freely. Older and low-cost MCUs remain common. Fixed-point is often faster and
+more deterministic than <code>float</code>, and it excels in situations like:</p>
 
 <ul>
-  <li><strong>8- and 16-bit MCUs</strong> (AVR, MSP430, 8051, sdcc) where the
+  <li><strong>8- and 16-bit MCUs</strong> (AVR, MSP430, 8051, SDCC) where the
       FPU does not exist and even software float is too slow or too
       large.</li>
   <li><strong>Hot inner loops on any CPU</strong> where a
-      parameterised-radix integer multiply is faster and more
+      parameterized-radix integer multiply is faster and more
       deterministic than a <code>float</code>. Think DSP taps, PID
       loops, coordinate transforms inside a scanline renderer.</li>
   <li><strong>Bit-exact reproducibility</strong> across compilers,
@@ -179,20 +186,25 @@ <h2>Quick taste</h2>
  * conversions and simple arithmetic:
  *   I2FR, FR2I, FR_NUM, FR_ADD, FR_DIV, FR_ABS, FR_CHRDX, FR_EXP ...
  *
- * MixedCase FR_ names are functions &mdash; they contain loops, tables, or
- * multi-step algorithms where inlining would waste ROM:
- *   FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
+ * MixedCase FR_ names are legacy functions &mdash; they still work but
+ * map to the current lowercase names:
+ *   FR_Cos &rarr; fr_cos_deg, FR_Sin &rarr; fr_sin_deg, FR_Tan &rarr; fr_tan_deg
  *
- * lowercase fr_ names are v2 functions (radian trig, wave generators,
- * ADSR envelopes):
- *   fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ...
+ * lowercase fr_ names are the current API (degree wrappers, radian
+ * trig, BAM trig, wave generators, ADSR envelopes):
+ *   fr_cos_deg, fr_sin_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan,
+ *   fr_wave_tri, fr_adsr_step ...
+ *
+ * Other MixedCase / lowercase FR_ names are functions with loops,
+ * tables, or multi-step algorithms:
+ *   FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
  *
  * Some macros wrap functions: FR_EXP(x,r) scales x then calls
  * FR_pow2 &mdash; one-liner convenience, heavy lifting in the function.
  */
 
 /* ---- Math functions ---- */
-s32 c45   = FR_Cos(45, 0);                /* cos(45&deg;) = 0.7071       */
+s32 c45   = fr_cos_deg(45, 0);             /* cos(45&deg;) = 0.7071       */
 s32 s30   = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad)    */
 s32 root2 = FR_sqrt(two, R);              /* sqrt(2)  = 1.4142       */
 s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad     */
@@ -228,7 +240,7 @@ <h2>Comparison</h2>
 <tr><td>Multiply-free option</td><td>No</td><td>No</td><td>Yes (e.g. <code>FR_EXP_FAST</code>, <code>FR_hypot_fast8</code>)</td></tr>
 <tr><td>Wave generators</td><td>No</td><td>No</td><td>6 shapes + ADSR</td></tr>
 <tr><td>Dependencies</td><td>None</td><td>ARM only</td><td>None</td></tr>
-<tr><td>Code size (Cortex-M0, -Os)</td><td>2.4 KB</td><td>~40 KB+</td><td>4.2 KB</td></tr>
+<tr><td>Code size (Cortex-M0, -Os)</td><td>2.4 KB</td><td>~40 KB+</td><td>3.4 KB lean / 5.7 KB full</td></tr>
 </tbody>
 </table>
 
@@ -237,7 +249,7 @@ <h2>Comparison</h2>
 FR_Math includes log/ln/log10, wave generators, ADSR, print helpers,
 and variable radix. CMSIS-DSP estimate is for the math function subset
 only. See
-<a href="https://github.com/deftio/fr_math/blob/master/docker/build_sizes.sh">docker/build_sizes.sh</a>
+<a href="https://github.com/deftio/fr_math/blob/master/scripts/crossbuild_sizes.sh">scripts/crossbuild_sizes.sh</a>
 for the build script.</small></p>
 
 <h2>History</h2>
@@ -246,7 +258,7 @@ <h2>History</h2>
 built for graphics transforms on 16&nbsp;MHz 68k Palm Pilots (it
 shipped inside Trumpetsoft&rsquo;s <em>Inkstorm</em>), then ported
 forward to ARM, x86, MIPS, RISC-V, and various 8/16-bit embedded
-targets. v2.0.7 is the current release with a full test suite,
+targets. The current release has a full test suite,
 bit-exact numerical specification, and CI on every push.</p>
 
 <h2>License</h2>
diff --git a/pages/releases.html b/pages/releases.html
index 337035a..96e18bb 100644
--- a/pages/releases.html
+++ b/pages/releases.html
@@ -5,7 +5,7 @@
   <meta http-equiv="X-UA-Compatible" content="IE=edge">
   <meta name="viewport" content="width=device-width, initial-scale=1">
   <title>Releases — FR_Math</title>
-  <meta name="description" content="FR_Math release history: v2.0.7 README restructure and FR_CORE_ONLY, v2.0.0 precision rewrite and new waveforms, v1.0.3 test coverage, v1.02 initial public release.">
+  <meta name="description" content="FR_Math release history: v2.0.8 tangent rewrite and trig rounding, v2.0.7 FR_CORE_ONLY, v2.0.0 precision rewrite and new waveforms, v1.0.3 test coverage, v1.02 initial public release.">
   <link rel="stylesheet" href="assets/main.css">
 </head>
 <body>
@@ -21,6 +21,19 @@ <h1>Releases</h1>
 <a href="https://github.com/deftio/fr_math/blob/master/release_notes.md">release_notes.md</a>
 in the repo.</p>
 
+<h2>v2.0.8 &mdash; 2026</h2>
+
+<p>Tangent accuracy rewrite and trig rounding fix.</p>
+
+<ul>
+  <li><strong>BAM-native tangent</strong>: new <code>fr_tan_bam(u16 bam)</code> with 65-entry octant table (130 bytes). No 64-bit math. <code>FR_TanI</code>, <code>FR_Tan</code>, <code>fr_tan</code> are now thin wrappers.</li>
+  <li><strong>Round-to-nearest fix</strong>: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy.</li>
+  <li><strong>Conversion macro trimming</strong>: <code>FR_DEG2BAM</code> and <code>FR_RAD2BAM</code> reduced to ~18&ndash;21 bits (from ~28 bits). Verified: no measurable accuracy impact.</li>
+  <li><strong><code>FR_TRIG_MINVAL</code> fixed</strong>: now <code>-FR_TRIG_MAXVAL</code> (was <code>-FR_TRIG_MASK</code>)</li>
+</ul>
+
+<hr>
+
 <h2>v2.0.7 &mdash; 2026</h2>
 
 <p>README restructure, accuracy table cleanup, expanded cross-compile support.</p>
@@ -28,10 +41,10 @@ <h2>v2.0.7 &mdash; 2026</h2>
 <ul>
   <li><strong><code>FR_CORE_ONLY</code> convenience define</strong> &mdash; single <code>#define</code> strips both print helpers and wave generators</li>
   <li><strong>Accuracy table cleanup</strong> &mdash; removed LSB column (percent error is the user-facing metric)</li>
-  <li><strong>New cross-compile targets</strong> &mdash; RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build</li>
-  <li><strong>Two-column size table</strong> &mdash; Core (<code>-DFR_CORE_ONLY</code>) vs Full for every target</li>
-  <li><strong><code>scripts/update_sizes.sh</code></strong> &mdash; auto-patches size tables from <code>build/sizes.csv</code></li>
-  <li>README reordered: accuracy table first, then function list, then size table</li>
+  <li><strong>New cross-compile targets</strong> &mdash; RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build</li>
+  <li><strong>Three-column size table</strong> &mdash; Lean / Core / Full for every target, sorted 8-bit &rarr; 64-bit</li>
+  <li><strong><code>scripts/crossbuild_sizes.sh</code></strong> &mdash; consolidated script: Docker build, CSV + markdown output, doc patching</li>
+  <li>README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions</li>
 </ul>
 
 <hr>
@@ -159,7 +172,7 @@ <h3>New utility macros</h3>
   <li><code>FR_DIV(x, xr, y, yr)</code> &mdash; fixed-point division with
       64-bit pre-scaling. Now <strong>rounds to nearest</strong>
       (&le; 0.5 LSB error) instead of truncating.
-      <code>FR_DIV_TRUNC</code> preserves the old truncating behaviour
+      <code>FR_DIV_TRUNC</code> preserves the old truncating behavior
       for backward compatibility. <code>FR_DIV32</code> is the 32-bit-only
       truncating path.</li>
   <li><code>FR_MOD(x, xr, y, yr)</code> &mdash; fixed-point modulus.</li>
@@ -190,7 +203,7 @@ <h3>Breaking changes from v2.0.0</h3>
 <tr><td>FR_atan signature</td><td><code>(input, radix)</code> &rarr; s16 degrees</td><td><code>(input, radix, out_radix)</code> &rarr; s32 radians</td></tr>
 <tr><td>FR_atan2 signature</td><td><code>(y, x)</code> &rarr; s16 degrees</td><td><code>(y, x, out_radix)</code> &rarr; s32 radians</td></tr>
 <tr><td>FR_BAM2RAD</td><td>off by 1024&times; (bug)</td><td>correct</td></tr>
-<tr><td>FR_DIV rounding</td><td>truncates toward zero</td><td>rounds to nearest (use <code>FR_DIV_TRUNC</code> for old behaviour)</td></tr>
+<tr><td>FR_DIV rounding</td><td>truncates toward zero</td><td>rounds to nearest (use <code>FR_DIV_TRUNC</code> for old behavior)</td></tr>
 </tbody>
 </table>
 
@@ -231,7 +244,7 @@ <h3>Numerical fixes</h3>
       dropped.</li>
   <li><code>FR_atan</code>, <code>FR_Tan</code>, <code>FR_TanI</code>:
       wiring and overflow fixes.</li>
-  <li><code>FR_printNumD/F/H</code>: fixed undefined behaviour on
+  <li><code>FR_printNumD/F/H</code>: fixed undefined behavior on
       <code>INT_MIN</code> and a broken fraction extraction in the
       v1 code.</li>
   <li><code>FR_DEG2RAD</code> / <code>FR_RAD2DEG</code>: macro bodies
@@ -251,7 +264,7 @@ <h3>New functionality</h3>
       <code>FR_BAM2DEG</code>, <code>FR_RAD2BAM</code>,
       <code>FR_BAM2RAD</code>. BAM (16 bits per full circle) is the
       natural integer representation for phase accumulators and
-      gives zero quantisation at the wraparound.</li>
+      gives zero quantization at the wraparound.</li>
   <li><strong>Square root and hypot</strong>: <code>FR_sqrt</code>
       uses a digit-by-digit integer isqrt on <code>int64_t</code>;
       <code>FR_hypot</code> computes <code>sqrt(x&sup2; + y&sup2;)</code>
@@ -312,10 +325,9 @@ <h3>Breaking changes</h3>
 
 <h3>Test suite</h3>
 
-<p>v2 ships with <strong>42 tests</strong> across six test binaries
-and a characterisation suite (<code>test_tdd.cpp</code>) that pins
-numerical behaviour to bit-exact reference values. Overall line
-coverage is <strong>99%</strong> on the library sources.</p>
+<p>v2 ships with a full test suite covering <strong>99%</strong> of library
+source lines, plus a characterization suite (<code>test_tdd.cpp</code>)
+that pins numerical behavior to bit-exact reference values.</p>
 
 <h2>v1.0.3 &mdash; 2025</h2>
 
@@ -351,7 +363,7 @@ <h2>Timeline</h2>
 when it was written to run 2D graphics transforms on 16&nbsp;MHz 68k
 Palm Pilots for Trumpetsoft&rsquo;s <em>Inkstorm</em>. It has since
 been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and
-16-bit embedded targets. v2.0.7 is the current release with a
+16-bit embedded targets. The current release has a
 full test suite, a bit-exact numerical specification, and CI on
 every push.</p>
 
diff --git a/pages/version.json b/pages/version.json
new file mode 100644
index 0000000..f81a375
--- /dev/null
+++ b/pages/version.json
@@ -0,0 +1 @@
+{"version":"2.0.8","hex":"0x020008"}
diff --git a/release_management.md b/release_management.md
index 3353e3a..214f544 100644
--- a/release_management.md
+++ b/release_management.md
@@ -20,7 +20,7 @@ All version-bearing files are kept in sync via
 | `./scripts/build.sh` | Clean rebuild + run tests (one-shot) |
 | `./scripts/clean_build.sh` | Wipe `build/` and `coverage/`, recreate them |
 | `./scripts/coverage_report.sh` (or `make coverage`) | gcov coverage table |
-| `./scripts/size_report.sh` (or `make size-report`) | Multi-arch object-size report |
+| `./scripts/crossbuild_sizes.sh` (or `make size-report`) | Multi-arch object-size report (Docker) |
 | `./scripts/sync_version.sh` | Propagate `FR_MATH_VERSION_HEX` to every versioned file |
 | `./scripts/sync_version.sh --check` | Drift check (non-destructive) |
 | `./tools/make_release.sh` | Guided release pipeline (validate → PR → merge → tag → publish) |
@@ -77,7 +77,7 @@ start, so do not run it inside a session that depends on pre-existing
 Invoked automatically by `make coverage` and by
 `tools/make_release.sh`.
 
-### `scripts/size_report.sh` — multi-architecture size report
+### `scripts/crossbuild_sizes.sh` — multi-architecture size report
 
 Compiles `src/FR_math.c` against every cross-toolchain it can find and
 prints a formatted table of object sizes. Architectures attempted:
@@ -242,7 +242,7 @@ invoked individually.
 
 | Target | Effect |
 | --- | --- |
-| `make size-report` | Delegates to `scripts/size_report.sh` (multi-arch table) |
+| `make size-report` | Delegates to `scripts/crossbuild_sizes.sh` (multi-arch table, Docker) |
 | `make size-simple` | `size` (or `ls -lh`) on `build/*.o` for the current platform only |
 
 ### Clean
@@ -290,7 +290,7 @@ loop is:
 ```bash
 ./scripts/build.sh           # clean rebuild + tests
 ./scripts/coverage_report.sh # coverage after a change
-./scripts/size_report.sh     # size after a change
+./scripts/crossbuild_sizes.sh     # size after a change
 ```
 
 ---
diff --git a/release_notes.md b/release_notes.md
index 8a5f3bb..5ff9659 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,5 +1,51 @@
 # FR_Math Release Notes
 
+## Version 2.0.8 (2026)
+
+Tangent accuracy rewrite and trig rounding fix.
+
+### BAM-native tangent table
+
+- **New `fr_tan_bam(u16 bam)`** function with a dedicated 65-entry octant
+  lookup table (`gFR_TAN_TAB_O` in `FR_trig_table.h`, 130 bytes ROM).
+  First octant uses direct table + lerp; second octant uses the
+  reciprocal identity `tan(x) = 1/tan(90-x)` with one 32-bit division.
+  No 64-bit intermediates anywhere in the tan path.
+- **`FR_TanI`, `FR_Tan`, `fr_tan`** are now thin wrappers over
+  `fr_tan_bam`. The old sin/cos division implementation is removed.
+- Peak error: 0.17% (BAM), 0.60% (deg r7), 0.17% (rad r16).
+
+### Round-to-nearest fix for radian/degree wrappers
+
+- `fr_cos`, `fr_sin`, `fr_tan`, `FR_Cos`, `FR_Sin`, `FR_Tan` now add
+  0.5 LSB (`1 << (radix-1)`) before the `>> radix` shift when converting
+  from radians/degrees to BAM. This rounds to the nearest BAM value
+  instead of truncating, eliminating a systematic 1-BAM rounding error
+  that caused ~1% peak error near zero crossings.
+- Radian-path sin/cos/tan now match BAM-native accuracy (0.16-0.17%
+  peak, was ~1.03%).
+
+### Conversion macro trimming
+
+- `FR_DEG2BAM`: 10 terms (~28 bits) reduced to 7 terms (~18 bits)
+- `FR_RAD2BAM`: 9 terms (~27 bits) reduced to 7 terms (~21 bits)
+- `FR_DEG2RAD`: 3 terms (~13 bits) extended to 5 terms (~17 bits)
+- 18 bits of precision gives 4 bits of headroom over the 14-bit
+  effective BAM resolution of the trig tables. Verified: reverting to
+  the old full-precision macros changes sin/cos peak error by <0.04%.
+
+### Other
+
+- `FR_TRIG_MINVAL` fixed: was `-FR_TRIG_MASK` (-65535), now
+  `-FR_TRIG_MAXVAL` (-2147483647) to properly pair with `FR_TRIG_MAXVAL`
+  for tan saturation clamping.
+- Accuracy table in all docs now shows separate BAM/deg/rad rows for
+  sin/cos and tan, matching the TDD characterization report.
+- `fr_tan_bam` added to function listings across README, docs, HTML
+  pages, and llms.txt.
+
+---
+
 ## Version 2.0.7 (2026)
 
 README restructure, accuracy table cleanup, and expanded cross-compile support.
@@ -20,16 +66,18 @@ that varies with the chosen radix.
 
 - **RP2040 (Cortex-M0+)** and **STM32 (Cortex-M4)** added as named targets
   in the Docker cross-build
-- **68HC11** toolchain added to the Docker image
-- Size table now shows two columns: **Core** (`-DFR_CORE_ONLY`) and **Full**
-- `docker/build_sizes.sh` outputs `build/sizes.csv` for automated patching
-- New `scripts/update_sizes.sh` auto-patches size tables into README, docs,
-  and HTML pages
+- **68HC11** and **MIPS32** toolchains added to the Docker image
+- Size table now shows three columns: **Lean**, **Core**, and **Full**
+- Consolidated `scripts/crossbuild_sizes.sh` — single script runs Docker,
+  builds all targets, writes CSV + markdown, and patches doc files
+  (replaces `crossbuild-docker.sh`, `size_report.sh`, `update_sizes.sh`)
+- Size table sorted by architecture width (8-bit → 64-bit)
 
 ### README restructure
 
 Sections reordered: accuracy table moved above the size table to lead with
-the library's primary selling point. Size table now shows Core vs Full columns.
+the library's primary selling point. Badges cleaned up from Quikdown HTML to
+standard markdown syntax. Build flavor descriptions made more concise.
 
 ---
 
diff --git a/scripts/accuracy_report.sh b/scripts/accuracy_report.sh
index 1bd5745..f996ac1 100755
--- a/scripts/accuracy_report.sh
+++ b/scripts/accuracy_report.sh
@@ -86,12 +86,13 @@ patch_markdown() {
         return
     fi
 
-    # Build replacement block: sentinel + header + separator + data + sentinel
+    # Build replacement block: sentinel + header + separator + data + footnote + sentinel
     local replacement
     replacement="<!-- ACCURACY_TABLE_START -->"$'\n'
-    replacement+="| Function | Max err (%) | Avg err (%) | Note |"$'\n'
+    replacement+="| Function | Max err (%)*| Avg err (%) | Note |"$'\n'
     replacement+="|---|---:|---:|---|"$'\n'
     replacement+="$DATA_ROWS"$'\n'
+    replacement+=$'\n'"*Relative error; reference clamped to 1% of full-scale output."$'\n'
     replacement+="<!-- ACCURACY_TABLE_END -->"
 
     # Use perl to replace between sentinels
@@ -137,11 +138,12 @@ patch_html() {
     local replacement
     replacement="<!-- ACCURACY_TABLE_START -->"$'\n'
     replacement+="<table>"$'\n'
-    replacement+="<thead><tr><th>Function</th><th>Max err (%)</th><th>Avg err (%)</th><th>Note</th></tr></thead>"$'\n'
+    replacement+="<thead><tr><th>Function</th><th>Max err (%)*</th><th>Avg err (%)</th><th>Note</th></tr></thead>"$'\n'
     replacement+="<tbody>"$'\n'
     replacement+="$html_rows"$'\n'
     replacement+="</tbody>"$'\n'
     replacement+="</table>"$'\n'
+    replacement+="<p><em>*Relative error; reference clamped to 1% of full-scale output.</em></p>"$'\n'
     replacement+="<!-- ACCURACY_TABLE_END -->"
 
     perl -0777 -i -pe "
diff --git a/scripts/build.sh b/scripts/build.sh
index ade09a4..129c18a 100755
--- a/scripts/build.sh
+++ b/scripts/build.sh
@@ -75,7 +75,7 @@ echo -e "${GREEN}  ok${NC}"
 
 # Print host-compiled library sizes so the developer can see how the
 # objects came out without having to dig in build/. This is host-only;
-# for a multi-arch comparison run scripts/size_report.sh.
+# for a multi-arch comparison run scripts/crossbuild_sizes.sh.
 print_host_size() {
     local host_arch
     host_arch="$(uname -m 2>/dev/null || echo unknown)"
@@ -129,6 +129,6 @@ echo -e "${GREEN}=========================================${NC}"
 echo ""
 echo "Next steps:"
 echo "  - ./scripts/coverage_report.sh    (coverage analysis)"
-echo "  - ./scripts/size_report.sh        (object file sizes)"
+echo "  - ./scripts/crossbuild_sizes.sh   (object file sizes)"
 echo "  - ./tools/make_release.sh         (guided release pipeline)"
 echo ""
diff --git a/scripts/crossbuild-docker.sh b/scripts/crossbuild-docker.sh
deleted file mode 100755
index 7f10d6d..0000000
--- a/scripts/crossbuild-docker.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/bash
-# crossbuild-docker.sh -- cross-compile FR_math inside Docker container
-# Runs inside the xelp-crossbuild Docker image.
-# Reports object file and .text section sizes for each target.
-
-set -e
-
-SRC=/fr_math/src/FR_math.c
-INCLUDE="-I/fr_math/src"
-OBJ=/tmp/FR_math.o
-
-SEP="============================================================"
-
-# Accumulate summary rows: "label|text_size"
-SUMMARY=""
-
-print_sizes() {
-    local label="$1"
-    echo ""
-    echo "$SEP"
-    echo "$label"
-    echo "$SEP"
-    if [ ! -f "$OBJ" ]; then
-        echo "  (build failed)"
-        SUMMARY="${SUMMARY}${label}|FAIL\n"
-        return
-    fi
-    OBJ_SIZE=$(stat -c%s "$OBJ" 2>/dev/null || wc -c < "$OBJ")
-    TEXT_SIZE=$(size "$OBJ" 2>/dev/null | awk 'FNR==2{print $1}')
-    printf "  obj file size: %6s bytes\n" "$OBJ_SIZE"
-    printf "  .text section: %6s bytes\n" "$TEXT_SIZE"
-    SUMMARY="${SUMMARY}${label}|${TEXT_SIZE}\n"
-    rm -f "$OBJ"
-}
-
-echo ""
-echo "FR_Math cross-compilation size report"
-echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')"
-echo ""
-
-# --- x86 ---
-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC x86-64"
-
-clang -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "Clang x86-64"
-
-gcc -c $SRC $INCLUDE -Os -m32 -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC x86-32"
-
-tcc -c $SRC $INCLUDE -o $OBJ 2>&1 && true
-print_sizes "TCC x86"
-
-# --- ARM ---
-aarch64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC AArch64 (ARM64)"
-
-arm-none-eabi-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC ARM32"
-
-arm-none-eabi-gcc -c $SRC $INCLUDE -Os -mthumb -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC ARM32 Thumb"
-
-# --- MSP430 ---
-# Bare-metal: no stdint.h in sysroot — use fallback typedefs
-NOSTD="-DFR_NO_STDINT"
-
-msp430-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC MSP430"
-
-# --- AVR ---
-avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=avr5 -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC AVR5 (ATmega328P)"
-
-avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=attiny85 -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC AVR ATtiny85"
-
-# --- 68HC11 ---
-m68hc11-gcc -c $SRC $INCLUDE $NOSTD -Os -o $OBJ 2>&1 && true
-print_sizes "GCC 68HC11"
-
-# --- 68k (Motorola 68000) ---
-m68k-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC m68k"
-
-# --- PowerPC ---
-powerpc-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC PowerPC"
-
-# --- RISC-V ---
-riscv64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC RISC-V (rv64)"
-
-riscv64-unknown-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -march=rv32imac -mabi=ilp32 -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC RISC-V (rv32)"
-
-# --- Xtensa (ESP8266/ESP32 family) ---
-xtensa-lx106-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true
-print_sizes "GCC Xtensa LX106 (ESP8266)"
-
-# --- Function size table (native GCC) ---
-echo ""
-echo "$SEP"
-echo "Function size table (GCC x86-64)"
-echo "$SEP"
-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1
-nm $OBJ -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true
-rm -f $OBJ
-
-# --- Summary table ---
-echo ""
-echo "$SEP"
-echo "Summary: FR_math.c code size (bytes), compiled with -Os"
-echo "$SEP"
-printf "  %-28s %s\n" "Target" ".text (bytes)"
-printf "  %-28s %s\n" "----------------------------" "-------------"
-echo -e "$SUMMARY" | while IFS='|' read -r label size; do
-    [ -z "$label" ] && continue
-    printf "  %-28s %s\n" "$label" "$size"
-done
-
-echo ""
-echo "Done."
diff --git a/scripts/crossbuild_sizes.sh b/scripts/crossbuild_sizes.sh
new file mode 100755
index 0000000..d6e1f85
--- /dev/null
+++ b/scripts/crossbuild_sizes.sh
@@ -0,0 +1,286 @@
+#!/usr/bin/env bash
+#
+# crossbuild_sizes.sh — cross-compile FR_math inside Docker, generate size
+# tables, and optionally patch doc files.
+#
+# Usage:
+#   scripts/crossbuild_sizes.sh            # build, print table, write CSV + MD
+#   scripts/crossbuild_sizes.sh --update   # also patch doc files
+#
+# Requires: docker, xelp-crossbuild:latest image
+#
+# Output files:
+#   build/sizes.csv  — raw CSV (target,width,lean,core,full)
+#   build/sizes.md   — markdown table
+#
+# With --update, patches these files between <!-- SIZE_TABLE_START/END --> sentinels:
+#   README.md              — markdown table
+#   docs/building.md       — markdown table
+#   pages/guide/building.html — HTML <table>
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+cd "${PROJECT_ROOT}"
+
+MODE="print"
+for arg in "$@"; do
+    case "$arg" in
+        --update) MODE="update" ;;
+        -h|--help)
+            echo "Usage: scripts/crossbuild_sizes.sh [--update]"
+            echo "  (no args)   Build in Docker, print size table, write CSV + MD"
+            echo "  --update    Also patch README.md, docs/building.md, pages/guide/building.html"
+            exit 0
+            ;;
+        *) echo "Unknown option: $arg" >&2; exit 1 ;;
+    esac
+done
+
+# -----------------------------------------------------------------------
+# 1. Preflight checks
+# -----------------------------------------------------------------------
+
+if ! command -v docker >/dev/null 2>&1; then
+    echo "ERROR: docker not found. Install Docker first." >&2
+    exit 1
+fi
+
+if ! docker image inspect xelp-crossbuild:latest >/dev/null 2>&1; then
+    echo "ERROR: Docker image 'xelp-crossbuild:latest' not found." >&2
+    echo "Build it with: docker build -t xelp-crossbuild:latest scripts/" >&2
+    exit 1
+fi
+
+mkdir -p build
+
+# -----------------------------------------------------------------------
+# 2. Run cross-compilation inside Docker
+# -----------------------------------------------------------------------
+
+echo "Running cross-compilation in Docker..."
+
+docker run --rm -v "${PROJECT_ROOT}:/fr_math" xelp-crossbuild:latest \
+    /bin/bash -c '
+set -e
+
+SRC=/fr_math/src/FR_math.c
+INCLUDE="-I/fr_math/src"
+OBJ=/tmp/FR_math.o
+CSV=/fr_math/build/sizes.csv
+
+LEAN_DEFS="-DFR_LEAN -DFR_NO_PRINT"
+CORE_DEFS="-DFR_CORE_ONLY"
+FULL_DEFS=""
+
+build_text_size() {
+    local compiler="$1"
+    local flags="$2"
+    local defs="$3"
+    rm -f "$OBJ"
+    if $compiler -c $SRC $INCLUDE $flags $defs -Os -Wall -o $OBJ 2>/dev/null; then
+        size "$OBJ" 2>/dev/null | awk "FNR==2{print \$1}"
+    else
+        echo "FAIL"
+    fi
+    rm -f "$OBJ"
+}
+
+build_target() {
+    local label="$1"
+    local width="$2"
+    local compiler="$3"
+    local flags="$4"
+
+    local lean_sz=$(build_text_size "$compiler" "$flags" "$LEAN_DEFS")
+    local core_sz=$(build_text_size "$compiler" "$flags" "$CORE_DEFS")
+    local full_sz=$(build_text_size "$compiler" "$flags" "$FULL_DEFS")
+    echo "${label},${width},${lean_sz},${core_sz},${full_sz}" >> "$CSV"
+}
+
+# Write CSV header
+echo "target,width,lean,core,full" > "$CSV"
+
+# --- 8-bit ---
+NOSTD="-DFR_NO_STDINT"
+build_target "AVR ATmega328P"            8  "avr-gcc"            "$NOSTD -mmcu=avr5"
+build_target "AVR ATtiny85"              8  "avr-gcc"            "$NOSTD -mmcu=attiny85"
+build_target "68HC11"                    8  "m68hc11-gcc"        "$NOSTD"
+
+# --- 16-bit ---
+build_target "MSP430"                   16  "msp430-gcc"         "$NOSTD"
+
+# --- 32-bit ---
+build_target "Cortex-M0 (RP2040)"       32  "arm-none-eabi-gcc"  "-mcpu=cortex-m0 -mthumb"
+build_target "Cortex-M4 (STM32)"        32  "arm-none-eabi-gcc"  "-mcpu=cortex-m4 -mthumb"
+build_target "ARM32"                    32  "arm-none-eabi-gcc"  ""
+build_target "ARM Thumb"                32  "arm-none-eabi-gcc"  "-mthumb"
+build_target "RISC-V rv32"              32  "riscv64-unknown-elf-gcc" "$NOSTD -march=rv32imac -mabi=ilp32"
+build_target "Xtensa LX106 (ESP8266)"   32  "xtensa-lx106-elf-gcc" "$NOSTD"
+build_target "Xtensa LX7 (ESP32-S3)"   32  "xtensa-esp-elf-gcc" ""
+build_target "m68k"                     32  "m68k-linux-gnu-gcc" ""
+build_target "PowerPC"                  32  "powerpc-linux-gnu-gcc" ""
+build_target "MIPS32"                   32  "mipsel-linux-gnu-gcc" ""
+build_target "x86-32"                   32  "gcc"                "-m32"
+build_target "TCC x86"                  32  "tcc"                ""
+
+# --- 64-bit ---
+build_target "RISC-V rv64"              64  "riscv64-linux-gnu-gcc" ""
+build_target "x86-64 (GCC)"             64  "gcc"                ""
+build_target "x86-64 (Clang)"           64  "clang"              ""
+build_target "AArch64 (ARM64)"          64  "aarch64-linux-gnu-gcc" ""
+
+echo "Docker build complete — $(grep -c , "$CSV") rows written to build/sizes.csv"
+'
+
+# -----------------------------------------------------------------------
+# 3. Generate tables on host
+# -----------------------------------------------------------------------
+
+CSV="build/sizes.csv"
+
+if [ ! -f "${CSV}" ]; then
+    echo "ERROR: ${CSV} not found after Docker run." >&2
+    exit 1
+fi
+
+# Sort by width ascending, then full size ascending (skip header)
+SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k5,5n)
+
+if [ -z "${SORTED}" ]; then
+    echo "ERROR: No data rows in ${CSV}" >&2
+    exit 1
+fi
+
+# Format bytes as X.X KB using integer math (no bc dependency)
+fmt_kb() {
+    local val="$1"
+    if [[ "${val}" =~ ^[0-9]+$ ]]; then
+        local whole=$((val / 1024))
+        local frac=$(( (val % 1024) * 10 / 1024 ))
+        echo "${whole}.${frac} KB"
+    else
+        echo "${val}"
+    fi
+}
+
+# --- Console summary ---
+echo ""
+echo "============================================================"
+echo "FR_math.c code size (.text bytes), compiled with -Os"
+echo "Sorted by architecture width (8-bit → 64-bit)"
+echo "============================================================"
+echo ""
+printf "  %-28s  %5s  %8s  %8s  %8s\n" "Target" "Width" "Lean" "Core" "Full"
+printf "  %-28s  %5s  %8s  %8s  %8s\n" "----------------------------" "-----" "--------" "--------" "--------"
+while IFS=',' read -r target width lean core full; do
+    printf "  %-28s  %4s-b  %8s  %8s  %8s\n" "$target" "$width" "$lean" "$core" "$full"
+done <<< "${SORTED}"
+echo ""
+echo "Lean = -DFR_LEAN -DFR_NO_PRINT  (radian trig, inv trig, log/exp, sqrt)"
+echo "Core = -DFR_CORE_ONLY           (Lean + degree/BAM trig, log10, hypot)"
+echo "Full = default                   (Core + print, waves, ADSR)"
+echo ""
+
+# --- build/sizes.md ---
+{
+    echo "# FR_math.c Code Sizes (.text bytes, -Os)"
+    echo ""
+    echo "Sorted by architecture width (8-bit → 64-bit)."
+    echo ""
+    echo "| Target | Lean | Core | Full |"
+    echo "|--------|-----:|-----:|-----:|"
+    while IFS=',' read -r target width lean core full; do
+        printf "| %s | %s | %s | %s |\n" "$target" "$(fmt_kb "$lean")" "$(fmt_kb "$core")" "$(fmt_kb "$full")"
+    done <<< "${SORTED}"
+    echo ""
+    echo "**Lean** (\`-DFR_LEAN -DFR_NO_PRINT\`): radian trig, inv trig, log/exp, sqrt."
+    echo "**Core** (\`-DFR_CORE_ONLY\`): Lean + degree/BAM trig, log10, hypot."
+    echo "**Full** (default): Core + formatted print, wave generators, ADSR envelope."
+} > build/sizes.md
+
+echo "Wrote build/sizes.csv and build/sizes.md"
+
+if [ "${MODE}" != "update" ]; then
+    exit 0
+fi
+
+# -----------------------------------------------------------------------
+# 4. Patch doc files
+# -----------------------------------------------------------------------
+
+# Build markdown replacement block (width column is for sorting only, omit from output)
+MD_ROWS=""
+while IFS=',' read -r target width lean core full; do
+    row="| ${target} | $(fmt_kb "${lean}") | $(fmt_kb "${core}") | $(fmt_kb "${full}") |"
+    if [ -n "${MD_ROWS}" ]; then
+        MD_ROWS+=$'\n'
+    fi
+    MD_ROWS+="${row}"
+done <<< "${SORTED}"
+
+MD_TABLE="<!-- SIZE_TABLE_START -->"$'\n'
+MD_TABLE+="| Target | Lean | Core | Full |"$'\n'
+MD_TABLE+="|--------|-----:|-----:|-----:|"$'\n'
+MD_TABLE+="${MD_ROWS}"$'\n'
+MD_TABLE+="<!-- SIZE_TABLE_END -->"
+
+# Patch a markdown file between sentinels
+patch_markdown() {
+    local file="$1"
+    if [ ! -f "$file" ]; then
+        echo "  skip: $file not found" >&2
+        return
+    fi
+
+    perl -0777 -i -pe "
+        s{<!-- SIZE_TABLE_START -->.*?<!-- SIZE_TABLE_END -->}
+         {${MD_TABLE}}s
+    " "$file"
+
+    echo "  patched: $file"
+}
+
+# Patch HTML file between sentinels
+patch_html() {
+    local file="$1"
+    if [ ! -f "$file" ]; then
+        echo "  skip: $file not found" >&2
+        return
+    fi
+
+    # Build HTML rows (skip width column)
+    local html_rows=""
+    while IFS=',' read -r target width lean core full; do
+        local tr="<tr><td>${target}</td><td>$(fmt_kb "${lean}")</td><td>$(fmt_kb "${core}")</td><td>$(fmt_kb "${full}")</td></tr>"
+        if [ -n "$html_rows" ]; then
+            html_rows+=$'\n'
+        fi
+        html_rows+="${tr}"
+    done <<< "${SORTED}"
+
+    local replacement
+    replacement="<!-- SIZE_TABLE_START -->"$'\n'
+    replacement+="<table>"$'\n'
+    replacement+="<thead><tr><th>Target</th><th>Lean</th><th>Core</th><th>Full</th></tr></thead>"$'\n'
+    replacement+="<tbody>"$'\n'
+    replacement+="${html_rows}"$'\n'
+    replacement+="</tbody>"$'\n'
+    replacement+="</table>"$'\n'
+    replacement+="<!-- SIZE_TABLE_END -->"
+
+    perl -0777 -i -pe "
+        s{<!-- SIZE_TABLE_START -->.*?<!-- SIZE_TABLE_END -->}
+         {${replacement}}s
+    " "$file"
+
+    echo "  patched: $file"
+}
+
+echo ""
+echo "Patching doc files..."
+patch_markdown "README.md"
+patch_markdown "docs/building.md"
+patch_html "pages/guide/building.html"
+echo "Done."
diff --git a/scripts/size_report.sh b/scripts/size_report.sh
deleted file mode 100755
index 69c875f..0000000
--- a/scripts/size_report.sh
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/bin/bash
-# Enhanced size report for FR_Math library
-# Builds for multiple architectures and displays a formatted table
-
-set -e
-
-# Colors for output
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-NC='\033[0m' # No Color
-
-# Source and build directories
-SRC_DIR="src"
-BUILD_DIR="build"
-TEMP_DIR="build/size_report"
-
-# Create temp directory for builds
-mkdir -p "$TEMP_DIR"
-
-# Function to build and get size for an architecture
-build_and_size() {
-    local arch=$1
-    local compiler=$2
-    local flags=$3
-    local output_file="$TEMP_DIR/FR_math_${arch}.o"
-    
-    if command -v $compiler >/dev/null 2>&1; then
-        # Try to compile
-        if $compiler $flags -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$output_file" 2>/dev/null; then
-            # Get size in bytes
-            local size=$(wc -c < "$output_file" 2>/dev/null || echo "0")
-            echo "$size"
-        else
-            echo "fail"
-        fi
-    else
-        echo "n/a"
-    fi
-}
-
-# Function to format number with commas
-format_number() {
-    printf "%'d" $1 2>/dev/null || echo $1
-}
-
-echo ""
-echo "========================================="
-echo "     FR_Math Multi-Architecture Size Report"
-echo "========================================="
-echo ""
-echo "Building for all available architectures..."
-echo ""
-
-# Build for each architecture
-x86_32_size=$(build_and_size "x86-32" "gcc" "-m32")
-x86_64_size=$(build_and_size "x86-64" "gcc" "-m64")
-arm32_size=$(build_and_size "arm32" "arm-linux-gnueabihf-gcc" "")
-arm64_size=$(build_and_size "arm64" "aarch64-linux-gnu-gcc" "")
-# Bare-metal Cortex-M (Thumb) targets — toolchain is arm-none-eabi-gcc.
-# Cortex-M0 = Thumb-1 (very dense, no DSP), Cortex-M4 = Thumb-2 (DSP, MAC).
-cm0_size=$(build_and_size "cortex-m0" "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb --specs=nosys.specs")
-cm4_size=$(build_and_size "cortex-m4" "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb --specs=nosys.specs")
-m68k_size=$(build_and_size "m68k" "m68k-elf-gcc" "")
-# RISC-V: try the bare-metal newlib toolchain first, fall back to elf names.
-riscv32_size=$(build_and_size "riscv32" "riscv64-unknown-elf-gcc" "-march=rv32imc -mabi=ilp32")
-if [ "$riscv32_size" = "n/a" ]; then
-    riscv32_size=$(build_and_size "riscv32" "riscv32-unknown-elf-gcc" "")
-fi
-riscv64_size=$(build_and_size "riscv64" "riscv64-unknown-elf-gcc" "-march=rv64imac -mabi=lp64")
-
-# Native build
-native_arch=$(uname -m)
-native_size=$(build_and_size "native" "gcc" "")
-
-# Print formatted table
-printf "┌──────────────┬──────────────┬──────────┐\n"
-printf "│ Architecture │  Compiler    │   Size   │\n"
-printf "├──────────────┼──────────────┼──────────┤\n"
-
-# Function to print a row
-print_row() {
-    local arch=$1
-    local compiler=$2
-    local size=$3
-    
-    if [ "$size" = "n/a" ]; then
-        printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "not found" "    -"
-    elif [ "$size" = "fail" ]; then
-        printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "error" "    -"
-    elif [ "$size" = "0" ]; then
-        printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "$compiler" "    -"
-    else
-        printf "│ %-12s │ %-12s │ %'8d │\n" "$arch" "$compiler" "$size"
-    fi
-}
-
-# Print each architecture
-print_row "x86-32" "gcc -m32" "$x86_32_size"
-print_row "x86-64" "gcc -m64" "$x86_64_size"
-print_row "ARM32" "arm-gcc" "$arm32_size"
-print_row "ARM64" "aarch64-gcc" "$arm64_size"
-print_row "Cortex-M0" "arm-eabi-gcc" "$cm0_size"
-print_row "Cortex-M4" "arm-eabi-gcc" "$cm4_size"
-print_row "68k" "m68k-gcc" "$m68k_size"
-print_row "RISC-V 32" "riscv32-gcc" "$riscv32_size"
-print_row "RISC-V 64" "riscv64-gcc" "$riscv64_size"
-printf "├──────────────┼──────────────┼──────────┤\n"
-print_row "Native($native_arch)" "gcc" "$native_size"
-printf "└──────────────┴──────────────┴──────────┘\n"
-
-# Optimization comparison for native
-if [ "$native_size" != "n/a" ] && [ "$native_size" != "fail" ]; then
-    echo ""
-    echo "Optimization Comparison (Native $native_arch):"
-    echo "────────────────────────────────────────"
-    
-    os_size=$(gcc -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_Os.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_Os.o" || echo "0")
-    o2_size=$(gcc -Isrc -Wall -O2 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O2.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O2.o" || echo "0")
-    o3_size=$(gcc -Isrc -Wall -O3 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O3.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O3.o" || echo "0")
-    o0_size=$(gcc -Isrc -Wall -O0 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O0.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O0.o" || echo "0")
-    
-    printf "  -O0 (none):  %'8d bytes\n" $o0_size
-    printf "  -Os (size):  %'8d bytes\n" $os_size
-    printf "  -O2 (speed): %'8d bytes\n" $o2_size
-    printf "  -O3 (max):   %'8d bytes\n" $o3_size
-fi
-
-echo ""
-echo "Note: Install cross-compilers for more architectures:"
-echo "  Ubuntu/Debian:"
-echo "    sudo apt-get install gcc-multilib g++-multilib"
-echo "    sudo apt-get install gcc-arm-linux-gnueabihf"
-echo "    sudo apt-get install gcc-aarch64-linux-gnu"
-echo "    sudo apt-get install gcc-arm-none-eabi      # Cortex-M (Thumb)"
-echo "    sudo apt-get install gcc-riscv64-unknown-elf"
-echo "    sudo apt-get install gcc-m68k-linux-gnu"
-echo ""
-echo "  macOS (via brew):"
-echo "    brew install --cask gcc-arm-embedded         # Cortex-M (Thumb)"
-echo "    brew install arm-gnu-toolchain"
-echo "    brew install riscv-gnu-toolchain"
-echo ""
\ No newline at end of file
diff --git a/scripts/sync_version.sh b/scripts/sync_version.sh
index 4a7f763..2a33525 100755
--- a/scripts/sync_version.sh
+++ b/scripts/sync_version.sh
@@ -18,8 +18,7 @@
 #   src/FR_math.h                — FR_MATH_VERSION string (derived from _HEX)
 #   VERSION                      — plain-text "X.Y.Z" (derived from _HEX)
 #   README.md                    — shields.io version badge
-#   README.md                    — "Current version:" line
-#   pages/assets/site.js         — FR_VERSION constant (docs page header)
+#   pages/version.json           — {"version":"X.Y.Z","hex":"0xMMmmpp"} for site.js
 #   src/FR_math_2D.h             — @version doxygen tag
 #   src/FR_math_2D.cpp           — @version doxygen tag
 #   library.properties           — Arduino Library Manager version
@@ -196,20 +195,29 @@ update_file "README.md version badge" "${PROJECT_ROOT}/README.md" \
     "s|(img\\.shields\\.io/badge/version-)[0-9]+\\.[0-9]+\\.[0-9]+(-[a-z]+\\.svg)|\${1}${VERSION}\${2}|g"
 
 # --------------------------------------------------------------------------
-# 4. README.md — "Current version: X.Y.Z" line in the Version section
+# 4. pages/version.json — machine-readable version for site.js
+#    site.js fetches this at runtime so no hardcoded version in JS.
 # --------------------------------------------------------------------------
-update_file "README.md Current version: line" "${PROJECT_ROOT}/README.md" \
-    "s|(Current version: )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g"
-
-# --------------------------------------------------------------------------
-# 5. pages/assets/site.js — FR_VERSION constant
-#    Pattern: var FR_VERSION = 'v2.0.0';
-# --------------------------------------------------------------------------
-update_file "pages/assets/site.js FR_VERSION" "${PROJECT_ROOT}/pages/assets/site.js" \
-    "s|(var FR_VERSION = 'v)[0-9]+\\.[0-9]+\\.[0-9]+(';)|\${1}${VERSION}\${2}|g"
+VER_JSON="${PROJECT_ROOT}/pages/version.json"
+VER_JSON_WANT="{\"version\":\"${VERSION}\",\"hex\":\"${WANT_HEX}\"}"
+VER_JSON_CUR=""
+if [[ -f "${VER_JSON}" ]]; then
+    VER_JSON_CUR=$(cat "${VER_JSON}" | tr -d '[:space:]')
+fi
+VER_JSON_WANT_TRIMMED=$(echo "${VER_JSON_WANT}" | tr -d '[:space:]')
+if [[ "${VER_JSON_CUR}" == "${VER_JSON_WANT_TRIMMED}" ]]; then
+    echo -e "  ${GREEN}ok  ${NC} pages/version.json"
+elif [[ "${MODE}" == "check" ]]; then
+    echo -e "  ${RED}DRIFT${NC} pages/version.json"
+    DRIFT=1
+else
+    echo "${VER_JSON_WANT}" > "${VER_JSON}"
+    echo -e "  ${YELLOW}updated${NC} pages/version.json"
+    CHANGED=1
+fi
 
 # --------------------------------------------------------------------------
-# 6. src/FR_math_2D.h — @version doxygen tag
+# 5. src/FR_math_2D.h — @version doxygen tag
 # --------------------------------------------------------------------------
 update_file "src/FR_math_2D.h @version" "${PROJECT_ROOT}/src/FR_math_2D.h" \
     "s|(\\@version )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g"
diff --git a/scripts/update_sizes.sh b/scripts/update_sizes.sh
deleted file mode 100755
index b696edd..0000000
--- a/scripts/update_sizes.sh
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/env bash
-#
-# update_sizes.sh — read build/sizes.csv and patch the size table into
-# README.md, docs/building.md, and pages/guide/building.html.
-#
-# Usage:
-#   scripts/update_sizes.sh            # print table to stdout
-#   scripts/update_sizes.sh --update   # also patch the three doc files
-#
-# The table is delimited by sentinel comments:
-#   <!-- SIZE_TABLE_START -->
-#   ...
-#   <!-- SIZE_TABLE_END -->
-#
-# Exit status: 0 on success, non-zero on missing CSV or extraction failure.
-
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
-cd "${PROJECT_ROOT}"
-
-CSV="build/sizes.csv"
-MODE="print"
-
-for arg in "$@"; do
-    case "$arg" in
-        --update) MODE="update" ;;
-        -h|--help)
-            echo "Usage: scripts/update_sizes.sh [--update]"
-            echo "  (no args)   Read build/sizes.csv, print size table"
-            echo "  --update    Also patch README.md, docs/building.md, pages/guide/building.html"
-            exit 0
-            ;;
-        *) echo "Unknown option: $arg" >&2; exit 1 ;;
-    esac
-done
-
-if [ ! -f "${CSV}" ]; then
-    echo "ERROR: ${CSV} not found. Run docker/build_sizes.sh first." >&2
-    exit 1
-fi
-
-# -----------------------------------------------------------------------
-# 1. Read CSV and sort by width then full_bytes ascending
-# -----------------------------------------------------------------------
-
-# Skip header, sort numerically by field 2 (width) then field 4 (full_bytes)
-SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k4,4n)
-
-if [ -z "${SORTED}" ]; then
-    echo "ERROR: No data rows in ${CSV}" >&2
-    exit 1
-fi
-
-# Build markdown data rows
-MD_ROWS=""
-while IFS=',' read -r target width core full; do
-    # Format bytes as X.X KB
-    fmt_kb() {
-        local val="$1"
-        if [[ "${val}" =~ ^[0-9]+$ ]]; then
-            awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }"
-        else
-            echo "${val}"
-        fi
-    }
-    row="| ${target} | $(fmt_kb "${core}") | $(fmt_kb "${full}") |"
-    if [ -n "${MD_ROWS}" ]; then
-        MD_ROWS+=$'\n'
-    fi
-    MD_ROWS+="${row}"
-done <<< "${SORTED}"
-
-# Build full markdown table
-MD_TABLE="<!-- SIZE_TABLE_START -->"$'\n'
-MD_TABLE+="| Target | Core | Full |"$'\n'
-MD_TABLE+="|--------|-----:|-----:|"$'\n'
-MD_TABLE+="${MD_ROWS}"$'\n'
-MD_TABLE+="<!-- SIZE_TABLE_END -->"
-
-echo "${MD_TABLE}"
-
-if [ "${MODE}" != "update" ]; then
-    exit 0
-fi
-
-# -----------------------------------------------------------------------
-# 2. Patch markdown files
-# -----------------------------------------------------------------------
-patch_markdown() {
-    local file="$1"
-    if [ ! -f "$file" ]; then
-        echo "  skip: $file not found" >&2
-        return
-    fi
-
-    perl -0777 -i -pe "
-        s{<!-- SIZE_TABLE_START -->.*?<!-- SIZE_TABLE_END -->}
-         {${MD_TABLE}}s
-    " "$file"
-
-    echo "  patched: $file" >&2
-}
-
-patch_markdown "README.md"
-patch_markdown "docs/building.md"
-
-# -----------------------------------------------------------------------
-# 3. Patch HTML file (pages/guide/building.html)
-# -----------------------------------------------------------------------
-patch_html() {
-    local file="$1"
-    if [ ! -f "$file" ]; then
-        echo "  skip: $file not found" >&2
-        return
-    fi
-
-    # Convert sorted CSV rows to HTML <tr> rows
-    local html_rows=""
-    while IFS=',' read -r target width core full; do
-        fmt_kb() {
-            local val="$1"
-            if [[ "${val}" =~ ^[0-9]+$ ]]; then
-                awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }"
-            else
-                echo "${val}"
-            fi
-        }
-        local tr="<tr><td>${target}</td><td>$(fmt_kb "${core}")</td><td>$(fmt_kb "${full}")</td></tr>"
-        if [ -n "$html_rows" ]; then
-            html_rows+=$'\n'
-        fi
-        html_rows+="${tr}"
-    done <<< "${SORTED}"
-
-    # Build the replacement block
-    local replacement
-    replacement="<!-- SIZE_TABLE_START -->"$'\n'
-    replacement+="<table>"$'\n'
-    replacement+="<thead><tr><th>Target</th><th>Core</th><th>Full</th></tr></thead>"$'\n'
-    replacement+="<tbody>"$'\n'
-    replacement+="${html_rows}"$'\n'
-    replacement+="</tbody>"$'\n'
-    replacement+="</table>"$'\n'
-    replacement+="<!-- SIZE_TABLE_END -->"
-
-    perl -0777 -i -pe "
-        s{<!-- SIZE_TABLE_START -->.*?<!-- SIZE_TABLE_END -->}
-         {${replacement}}s
-    " "$file"
-
-    echo "  patched: $file" >&2
-}
-
-patch_html "pages/guide/building.html"
-
-echo "Size table updated in all doc files." >&2
diff --git a/src/FR_math.c b/src/FR_math.c
index 181972e..95809f8 100644
--- a/src/FR_math.c
+++ b/src/FR_math.c
@@ -30,164 +30,646 @@
  */
 
 #include "FR_math.h"
-#include "FR_trig_table.h"
 
 #ifndef FR_NO_STDINT
 #include <stdint.h>
 #endif
 
 /*=======================================================
- * BAM-native trig: fr_cos_bam, fr_sin_bam, fr_cos, fr_sin, fr_tan
+ * Trig lookup tables (inlined — no separate header needed)
  *
- * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
- * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
- * quadrants (1, 3) reverse the in-quadrant index so the table is always read
- * in the same direction. Quadrants 1 and 2 get their sign flipped at the
- * end.
- *
- * Within each quadrant, the upper FR_TRIG_TABLE_BITS bits of the
- * in-quadrant value index the table; the lower FR_TRIG_FRAC_BITS bits drive
- * round-to-nearest linear interpolation between adjacent table entries.
- *
- * The last entry (table[FR_TRIG_TABLE_SIZE-1] = 0) means the
- * interpolation at the very edge of the quadrant never reads out of bounds.
- *
- * Rounding: we interpolate as
- *     v = lo - ((d * frac + HALF) >> FRAC_BITS)
- * where d = lo - hi (which is >= 0 because cos is monotonically decreasing
- * on [0, pi/2]). Using the subtract form guarantees the argument of >> is
- * always non-negative, so the behavior is portable C89 (no reliance on
- * implementation-defined right-shift of negative integers) and the +HALF
- * gives unambiguous round-half-up. Max error vs the true cos is ~1 LSB of
- * s0.15 (~3e-5 absolute); mean error ~0 (no bias).
+ * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format.
+ * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format.
+ * Generated by tools/coef-gen.py — do not hand-edit.
  */
-s32 fr_cos_bam(u16 bam)
+
+#define FR_TRIG_TABLE_BITS  (7)
+#define FR_TRIG_TABLE_SIZE  ((1 << FR_TRIG_TABLE_BITS) + 1)
+
+#define FR_TRIG_FRAC_BITS   (14 - FR_TRIG_TABLE_BITS)
+#define FR_TRIG_FRAC_MAX    (1 << FR_TRIG_FRAC_BITS)
+#define FR_TRIG_FRAC_MASK   (FR_TRIG_FRAC_MAX - 1)
+#define FR_TRIG_FRAC_HALF   (FR_TRIG_FRAC_MAX >> 1)
+#define FR_TRIG_QUADRANT    (1 << 14)
+
+static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = {
+        0,   402,   804,  1206,  1608,  2009,  2411,  2811,
+     3212,  3612,  4011,  4410,  4808,  5205,  5602,  5998,
+     6393,  6787,  7180,  7571,  7962,  8351,  8740,  9127,
+     9512,  9896, 10279, 10660, 11039, 11417, 11793, 12167,
+    12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091,
+    15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869,
+    18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475,
+    20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884,
+    23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073,
+    25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020,
+    27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707,
+    28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118,
+    30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238,
+    31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058,
+    32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568,
+    32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766,
+    32768
+};
+
+#define FR_TAN_TABLE_BITS  (6)
+#define FR_TAN_TABLE_SIZE  ((1 << FR_TAN_TABLE_BITS) + 1)
+#define FR_TAN_FRAC_BITS   (13 - FR_TAN_TABLE_BITS)
+#define FR_TAN_FRAC_MAX    (1 << FR_TAN_FRAC_BITS)
+#define FR_TAN_FRAC_MASK   (FR_TAN_FRAC_MAX - 1)
+#define FR_TAN_FRAC_HALF   (FR_TAN_FRAC_MAX >> 1)
+#define FR_TAN_OCTANT      (1 << 13)
+
+static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = {
+        0,   402,   804,  1207,  1610,  2013,  2417,  2822,
+     3227,  3634,  4042,  4450,  4861,  5272,  5686,  6101,
+     6518,  6937,  7358,  7782,  8208,  8637,  9068,  9503,
+     9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104,
+    13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001,
+    17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318,
+    21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226,
+    26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973,
+    32768
+};
+
+/*=======================================================
+ * Full-precision radian/degree → BAM conversion helpers
+ *
+ * rad_to_bam_full(r) returns a full s32 BAM value where:
+ *   upper 16 bits = integer BAM (the u16 table index)
+ *   lower 16 bits = sub-BAM fractional part
+ * Input r must already be normalized to radix 16 and reduced to [-pi, pi].
+ *
+ * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are
+ * reordered so intermediate sums stay within s32 for |r| <= pi at r16.
+ */
+static s32 rad_to_bam_full(s32 r)
 {
-	u32 q     = ((u32)bam >> 14) & 0x3;                /* top 2 bits = quadrant */
-	u32 inq   = (u32)bam & (FR_TRIG_QUADRANT - 1);     /* bottom 14 bits        */
-	u32 idx, frac;
-	s32 lo, hi, d, v;
+    /* 10 terms: 65536/(2*pi) ≈ 10430.37835...
+     * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14
+     * = 10430.378357 (~21-bit accuracy)
+     * Terms reordered: interleave negatives early to keep all intermediate
+     * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */
+    return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14);
+}
 
-	/* Exact cardinal angles: bam=0 → 1.0, bam=16384 → 0, etc. */
-	if (inq == 0)
-	{
-		if (q == 0) return  FR_TRIG_ONE;   /*   0° →  1.0 */
-		if (q == 2) return -FR_TRIG_ONE;   /* 180° → -1.0 */
-		return 0;                          /*  90° or 270° → 0 */
-	}
+#ifndef FR_LEAN
+/* deg_to_bam_full(d) — same idea for degrees.
+ * Input d must already be normalized to radix 16 and reduced to [-90, 90).
+ * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM).
+ * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */
+static s32 deg_to_bam_full(s32 d)
+{
+    return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+}
+#endif
 
-	if (q == 1 || q == 3)
-		inq = FR_TRIG_QUADRANT - inq;                  /* mirror across pi/2    */
+/* Normalize a fixed-radix value to radix 16. */
+static s32 normalize_to_r16(s32 val, u16 radix)
+{
+    return (radix > 16) ? (val >> (radix - 16))
+         : (radix < 16) ? (val << (16 - radix))
+         : val;
+}
 
-	idx  = inq >> FR_TRIG_FRAC_BITS;                   /* table index [0..SIZE-1] */
-	frac = inq &  FR_TRIG_FRAC_MASK;                   /* interp fraction       */
-	lo = gFR_COS_TAB_Q[idx];
-	hi = gFR_COS_TAB_Q[idx + 1];
-	d  = lo - hi;                                      /* >= 0: cos monotonic   */
-	v  = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+/* Reduce non-negative radian (at r16) to [0, 2*pi). */
+static s32 reduce_to_2pi(s32 r)
+{
+    const s32 two_pi = FR_TWO_PI(16);  /* 411775 */
+    if (r > (two_pi << 1))
+        r -= (r / two_pi) * two_pi;
+    else if (r > two_pi)
+        r -= two_pi;
+    return r;
+}
 
-	/* Shift s0.15 → s15.16 */
-	v <<= 1;
 
-	return (q == 1 || q == 2) ? -v : v;
+/* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM.
+ * Uses quadrant decomposition to keep rad_to_bam_full in its safe
+ * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */
+static u16 rad_r16_to_bam(s32 r)
+{
+    const s32 half_pi       = FR_HALF_PI(16);       /* 102944 */
+    const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */
+    const s32 pi            = FR_PI(16);             /* 205887 */
+    const s32 two_pi        = FR_TWO_PI(16);         /* 411775 */
+
+    u16 offset = 0;
+    if (r >= half_pi && r < three_half_pi) {
+        r -= pi;
+        offset = 0x8000u;
+    } else if (r >= three_half_pi) {
+        r -= two_pi;
+        /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */
+    }
+    return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16));
 }
 
-s32 fr_sin_bam(u16 bam)
+/* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */
+
+/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion.
+ * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */
+u16 fr_rad_to_bam(s32 rad, u16 radix)
 {
-	/* sin(x) = cos(x - pi/2) = cos(bam - 16384). The u16 wraparound makes
-	 * this completely free.
-	 */
-	return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT));
+    s32 r = normalize_to_r16(rad, radix);
+    /* Normalize to [0, 2π) */
+    if (r < 0) {
+        r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16);
+        if (r < 0) r += FR_TWO_PI(16);
+    }
+    r = reduce_to_2pi(r);
+    return rad_r16_to_bam(r);
+}
+
+#ifndef FR_LEAN
+/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion.
+ * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */
+u16 fr_deg_to_bam(s32 deg, u16 radix)
+{
+    s32 d = normalize_to_r16(deg, radix);
+
+    /* Reduce to [-180, 180) */
+    if (d >= FR_D360_R16 || d < -FR_D360_R16) {
+        s32 n = d / FR_D360_R16;
+        d -= n * FR_D360_R16;
+    }
+    if (d >=  FR_D180_R16) d -= FR_D360_R16;
+    if (d <  -FR_D180_R16) d += FR_D360_R16;
+
+    /* Reduce to [-90, 90) with BAM quadrant offset */
+    u16 offset = 0;
+    if (d >= FR_D90_R16)      { d -= FR_D180_R16; offset = 32768; }
+    else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+    return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16));
 }
+#endif
 
-/* Convert radians at given radix to BAM with rounding.
- * One radian = 65536 / (2*pi) ≈ 10430.378 BAM units.
- * We use the more precise scaled constant 10430378 / 1000 to keep error
- * bounded across a wide range of radians.
+/*=======================================================
+ * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan
+ *
+ * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
+ * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
+ * quadrants (1, 3) reverse the in-quadrant index so the table is always read
+ * in the same direction.
+ *
+ * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at
+ * index 128). After mirroring, small full_pos → small output (near zero),
+ * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles
+ * below one table step (~0.7°). This eliminates table quantization error
+ * in the region where it matters most.
+ *
+ * Sign rule: quadrants 2 and 3 negate the result.
+ * Mirror rule: quadrants 1 and 3 flip the in-quadrant position.
  */
-static u16 fr_rad_to_bam(s32 rad, u16 radix)
+s32 fr_sin_bam(u16 bam)
 {
-	int64_t scaled = ((int64_t)rad * 10430378LL) / 1000;
-	if (radix > 0)
-		scaled >>= radix;
-	return (u16)((u32)scaled & 0xffff);
+	u32 q   = ((u32)bam >> 14) & 0x3;              /* top 2 bits = quadrant */
+	u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1);  /* bottom 14 bits        */
+
+	/* Exact cardinal angles */
+	if (inq == 0) {
+		if (q == 0 || q == 2) return 0;            /*   0° or 180° → 0   */
+		if (q == 1) return  FR_TRIG_ONE;           /*  90° →  1.0        */
+		return -FR_TRIG_ONE;                       /* 270° → -1.0        */
+	}
+
+	/* Odd quadrants mirror: read table from the far end */
+	if (q == 1 || q == 3)
+		inq = FR_TRIG_QUADRANT - inq;
+
+	s32 v;
+
+	/* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step).
+	 * θ_rad = inq * (π/2) / 16384.  Output = θ * 65536 = inq * FR_kQ2RAD / 16384.
+	 * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */
+	if (inq < FR_TRIG_FRAC_MAX) {
+		v = (s32)(((u32)inq * 102944u + 8192u) >> 14);
+	} else {
+		/* Table lookup with 7-bit interpolation fraction */
+		u32 idx  = inq >> FR_TRIG_FRAC_BITS;
+		u32 frac = inq &  FR_TRIG_FRAC_MASK;
+		s32 lo = (s32)gFR_SIN_TAB_Q[idx];
+		s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1];
+		v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+		v <<= 1;   /* u0.15 → s15.16 */
+	}
+
+	return (q >= 2) ? -v : v;
 }
 
-s32 fr_cos(s32 rad, u16 radix)
+s32 fr_cos_bam(u16 bam)
 {
-	return fr_cos_bam(fr_rad_to_bam(rad, radix));
+	/* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */
+	return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT));
 }
 
-s32 fr_sin(s32 rad, u16 radix)
+s32 fr_cos(s32 rad, u16 radix)
 {
-	return fr_sin_bam(fr_rad_to_bam(rad, radix));
+	if (rad == 0) return FR_TRIG_ONE;
+	s32 r = normalize_to_r16(rad, radix);
+	if (r < 0) r = -r;
+	r = reduce_to_2pi(r);
+	/* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ,
+	 * cos(3π/2+δ) = sin(δ) ≈ δ. */
+	s32 delta = r - FR_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return -delta;
+	delta = r - FR_THREE_HALF_PI(16);
+	if (delta >= -256 && delta <= 256)
+		return delta;
+	return fr_cos_bam(fr_rad_to_bam(rad, radix));
 }
 
-/* fr_tan: returns sin/cos at s15.16 (radix 16). Saturates if cos is near zero. */
-s32 fr_tan(s32 rad, u16 radix)
+s32 fr_sin(s32 rad, u16 radix)
 {
-	u16 bam = fr_rad_to_bam(rad, radix);
-	s32 s   = fr_sin_bam(bam);
-	s32 c   = fr_cos_bam(bam);
-	if (c == 0)
-		return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
-	return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
+	if (rad == 0) return 0;
+	s32 r = normalize_to_r16(rad, radix);
+	s32 sign = 1;
+	if (r < 0) { r = -r; sign = -1; }
+	r = reduce_to_2pi(r);
+	/* Near 0 after reduction: sin(δ) ≈ δ */
+	if (r < 256) {
+		s32 v = r;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near π: sin(π + δ) = -sin(δ) ≈ -δ */
+	s32 delta = r - FR_PI(16);
+	if (delta >= -256 && delta <= 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */
+	delta = FR_TWO_PI(16) - r;
+	if (delta >= 0 && delta < 256) {
+		s32 v = -delta;
+		return (sign < 0) ? -v : v;
+	}
+	/* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */
+	if (r > FR_PI(16)) r -= FR_TWO_PI(16);
+	u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16);
+	s32 v = fr_sin_bam(bam);
+	return (sign < 0) ? -v : v;
 }
 
+#ifndef FR_LEAN
 /*=======================================================
- * Integer-degree and fixed-radix-degree trig wrappers
+ * BAM-native tangent: fr_tan_bam
  *
- * FR_CosI / FR_SinI are macros in the header (zero cost). The fixed-radix
- * variants here convert s.r degrees to BAM in one shot using a precomputed
- * reciprocal of 360 to avoid division on multiply-poor cores like 8051.
+ * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant
+ * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the
+ * second octant (45°, 90°). Result is s15.16 with saturation at the
+ * poles.
  *
- * Math: bam = deg * (65536 / 360) = deg * 182.0444...
- * In s.16 fixed point: 65536 / 360 = 0xB60B (rounded). So
- *   bam_u16 = (deg_s.r * 0xB60B) >> r
- * gives bam in u16 BAM units. The constant 0xB60B contains the divide by
- * 360 baked in; the shift `>> r` strips the input radix.
+ * No 64-bit intermediates. One 32-bit division only in the >45° path.
  */
-static u16 fr_deg_radix_to_bam(s16 deg, u16 radix)
+s32 fr_tan_bam(u16 bam)
 {
-	/* 0xB60B ≈ (65536/360) * 256 — the ×256 prescale keeps 32-bit math
-	 * friendly to 8051-class MCUs.  We must shift out both the input
-	 * fraction bits (radix) AND the 8-bit prescale, hence radix + 8.
-	 * The +half term rounds to nearest, matching FR_DEG2BAM behaviour.
-	 */
-	s32  v     = (s32)deg * 0xB60BL;
-	u16  shift = radix + 8;
-	return (u16)((u32)((v + (1L << (shift - 1))) >> shift) & 0xffff);
+	u32 q   = ((u32)bam >> 14) & 0x3;          /* quadrant (top 2 bits) */
+	u32 inq = (u32)bam & 0x3FFFu;              /* in-quadrant (14 bits) */
+	s32 sign = 1;
+	u32 idx, frac;
+	s32 lo, hi, raw;
+
+	/* Exact zeros: bam lands exactly on 0° or 180° */
+	if (inq == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Poles: bam lands exactly on 90° or 270° */
+	if (inq == 0 && (q == 1 || q == 3))
+		return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+	/* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */
+	if (q == 1 || q == 3) {
+		inq = 0x4000u - inq;
+		sign = -1;
+	}
+
+	/* Now inq is in (0, 0x4000) = (0°, 90°) exclusive.
+	 * Split into first octant [0, 45°) and second octant [45°, 90°). */
+	if (inq < FR_TAN_OCTANT) {
+		/* First octant: direct table lookup + lerp.
+		 * inq is 13 bits; top FR_TAN_TABLE_BITS index the table,
+		 * bottom FR_TAN_FRAC_BITS drive interpolation. */
+		idx  = inq >> FR_TAN_FRAC_BITS;
+		frac = inq &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near zero: redo interpolation with 4 extra bits of
+			 * precision to reduce rounding error when result is small. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			raw = (raw + 4) >> 3;        /* u0.19 → s15.16 with rounding */
+		} else {
+			raw <<= 1;                   /* u0.15 → s15.16              */
+		}
+	} else {
+		/* Second octant: tan(x) = 1 / tan(90° - x).
+		 * complement is in (0, 0x2000] = (0°, 45°]. */
+		u32 comp = 0x4000u - inq;
+
+		/* Look up tan(complement) from the table */
+		idx  = comp >> FR_TAN_FRAC_BITS;
+		frac = comp &  FR_TAN_FRAC_MASK;
+		lo = (s32)gFR_TAN_TAB_O[idx];
+		hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			/* Near pole: redo interpolation with 4 extra bits of
+			 * precision. The reciprocal amplifies small interpolation
+			 * errors, so extra precision significantly helps here.
+			 * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */
+			s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+			s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+			s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			if (raw_hp < 32) {
+				raw = FR_TRIG_MAXVAL;
+			} else {
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+			}
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+
+	return (sign < 0) ? -raw : raw;
 }
+#endif /* FR_LEAN */
 
-s32 FR_Cos(s16 deg, u16 radix)
+/* fr_tan — radian-input tangent.
+ *
+ * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM,
+ * then do direct octant table lookup + interpolation inline.
+ * Small-angle bypass at zero crossings: tan(x) ≈ x.
+ * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid
+ * BAM quantization error amplified by the reciprocal. */
+s32 fr_tan(s32 rad, u16 radix)
 {
-	return fr_cos_bam(fr_deg_radix_to_bam(deg, radix));
+	if (rad == 0) return 0;
+	s32 r = normalize_to_r16(rad, radix);
+
+	/* tan(-x) = -tan(x): extract sign, work with |r| */
+	s32 sign = 1;
+	if (r < 0) { r = -r; sign = -1; }
+	r = reduce_to_2pi(r);
+
+	/* Small-angle bypass at zero crossings: tan(δ) ≈ δ */
+	if (r < 256)
+		return (sign < 0) ? -r : r;
+	{
+		s32 delta = r - FR_PI(16);
+		if (delta >= -256 && delta <= 256)
+			return (sign < 0) ? -delta : delta;
+	}
+	{
+		s32 delta = FR_TWO_PI(16) - r;
+		if (delta >= 0 && delta < 256)
+			return (sign < 0) ? delta : -delta;
+	}
+
+	/* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2,
+	 * use cot(δ) ≈ 1/δ from the radian distance directly.
+	 * Compute δ at r24 using precise pole constants (8× less rounding
+	 * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants).
+	 * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */
+	{
+		const s32 pole_thresh = 2048;       /* r16 units (~1.79°) */
+		/* Precise pole positions at r24:
+		 * π/2  × 2^24 = 26353589.76 → 26353590
+		 * 3π/2 × 2^24 = 79060769.28 → 79060769 */
+		const s32 half_pi_r24       = 26353590;
+		const s32 three_half_pi_r24 = 79060769;
+
+		s32 d1 = r - FR_HALF_PI(16);       /* coarse check at r16 */
+		s32 d2 = r - FR_THREE_HALF_PI(16);
+		s32 pole_delta_r24 = 0;
+
+		if (d1 >= -pole_thresh && d1 <= pole_thresh) {
+			s32 r24 = r << 8;
+			s32 dd = r24 - half_pi_r24;
+			pole_delta_r24 = (dd < 0) ? -dd : dd;
+		} else if (d2 >= -pole_thresh && d2 <= pole_thresh) {
+			s32 r24 = r << 8;
+			s32 dd = r24 - three_half_pi_r24;
+			pole_delta_r24 = (dd < 0) ? -dd : dd;
+		}
+
+		if (pole_delta_r24 > 0) {
+			/* Determine sign from radian quadrant */
+			s32 pole_sign;
+			if (r < FR_HALF_PI(16))
+				pole_sign = 1;          /* before π/2: → +∞ */
+			else if (r < FR_PI(16))
+				pole_sign = -1;         /* past π/2: → -∞ */
+			else if (r <= FR_THREE_HALF_PI(16))
+				pole_sign = 1;          /* before 3π/2: → +∞ */
+			else
+				pole_sign = -1;         /* past 3π/2: → -∞ */
+
+			s32 raw;
+			if (pole_delta_r24 < 512) {
+				raw = FR_TRIG_MAXVAL;   /* δ < 2 at r16 → saturate */
+			} else {
+				/* cot(δ) ≈ 1/δ.  In s15.16: (2^40) / δ_r24 */
+				raw = (s32)((1ULL << 40) / (u32)pole_delta_r24);
+				if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL;
+			}
+			s32 v = (pole_sign < 0) ? -raw : raw;
+			return (sign < 0) ? -v : v;
+		}
+	}
+
+	/* Convert radian to u16 BAM */
+	u16 bam = rad_r16_to_bam(r);
+
+	/* Decompose BAM into quadrant + in-quadrant */
+	u32 q   = ((u32)bam >> 14) & 0x3;
+	u32 inq = (u32)bam & 0x3FFFu;
+	s32 tsign = 1;  /* tan sign from quadrant */
+
+	/* Exact zeros: bam lands on 0° or 180° */
+	if (inq == 0 && (q == 0 || q == 2))
+		return 0;
+
+	/* Q1/Q3: reflect and negate */
+	if (q == 1 || q == 3) {
+		inq = 0x4000u - inq;
+		tsign = -1;
+	}
+
+	/* Octant table lookup + interpolation (same logic as fr_tan_bam) */
+	u32 idx, frac;
+	s32 raw;
+
+	if (inq < FR_TAN_OCTANT) {
+		/* First octant [0°, 45°): direct lookup */
+		idx  = inq >> FR_TAN_FRAC_BITS;
+		frac = inq &  FR_TAN_FRAC_MASK;
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			s32 lo4 = lo << 4;
+			s32 hi4 = hi << 4;
+			raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			raw = (raw + 4) >> 3;
+		} else {
+			raw <<= 1;
+		}
+	} else {
+		/* Second octant [45°, 90°): reciprocal identity */
+		u32 comp = 0x4000u - inq;
+		idx  = comp >> FR_TAN_FRAC_BITS;
+		frac = comp &  FR_TAN_FRAC_MASK;
+		s32 lo = (s32)gFR_TAN_TAB_O[idx];
+		s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+		raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+		if (raw < 0x40) {
+			s32 lo4 = lo << 4;
+			s32 hi4 = hi << 4;
+			s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+			if (raw_hp < 32)
+				raw = FR_TRIG_MAXVAL;
+			else
+				raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+		} else {
+			raw = (s32)(0x80000000u / (u32)raw);
+		}
+	}
+
+	/* Combine quadrant sign and input sign */
+	s32 v = (tsign < 0) ? -raw : raw;
+	return (sign < 0) ? -v : v;
+}
+
+#ifndef FR_LEAN
+/*=======================================================
+ * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then
+ * call the BAM-native functions. Cardinal angles are exact.
+ */
+
+s32 fr_cos_deg(s32 deg, u16 radix)
+{
+	if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg));
+	if (deg < 0) deg = -deg;
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  FR_TRIG_ONE;
+		if (rem == 90)  return  0;
+		if (rem == 180) return -FR_TRIG_ONE;
+		if (rem == 270) return  0;
+	}
+	/* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180,
+	 * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */
+	s32 d = normalize_to_r16(deg, radix);
+	if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; }
+	{
+		const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+		s32 delta = d - FR_D90_R16;
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return -dr;
+		}
+		delta = d - (FR_D90_R16 + FR_D180_R16);
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+			return dr;
+		}
+	}
+	return fr_cos_bam(fr_deg_to_bam(deg, radix));
 }
 
-s32 FR_Sin(s16 deg, u16 radix)
+s32 fr_sin_deg(s32 deg, u16 radix)
 {
-	return fr_sin_bam(fr_deg_radix_to_bam(deg, radix));
+	if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg));
+	s32 sign = 1;
+	if (deg < 0) { deg = -deg; sign = -1; }
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 rem = (deg >> radix) % 360;
+		if (rem == 0)   return  0;
+		if (rem == 90)  return (sign < 0) ? -FR_TRIG_ONE :  FR_TRIG_ONE;
+		if (rem == 180) return  0;
+		if (rem == 270) return (sign < 0) ?  FR_TRIG_ONE : -FR_TRIG_ONE;
+	}
+	s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix));
+	return (sign < 0) ? -v : v;
 }
 
-s32 FR_TanI(s16 deg)
+s32 FR_TanI(s32 deg)
 {
-	u16 bam = FR_DEG2BAM(deg);
-	s32 s   = fr_sin_bam(bam);
-	s32 c   = fr_cos_bam(bam);
-	if (c == 0)
-		return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
-	return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
+	/* Exact pole: deg mod 180 == ±90. Sign matches input sign. */
+	s32 rem = deg % 180;
+	if (rem == 90 || rem == -90)
+		return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	return fr_tan_bam(FR_DEG2BAM_I(deg));
 }
 
-s32 FR_Tan(s16 deg, u16 radix)
+s32 fr_tan_deg(s32 deg, u16 radix)
 {
-	u16 bam = fr_deg_radix_to_bam(deg, radix);
-	s32 s   = fr_sin_bam(bam);
-	s32 c   = fr_cos_bam(bam);
-	if (c == 0)
-		return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
-	return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
+	if (radix == 0) return FR_TanI(deg);
+	s32 deg_orig = deg;
+	/* Normalize to [0, 360°) at caller radix */
+	s32 d360 = 360 << radix;
+	if (deg < 0) {
+		deg += ((-deg) / d360) * d360;
+		if (deg < 0) deg += d360;
+	}
+	if (deg >= d360) {
+		deg -= (deg / d360) * d360;
+	}
+	/* Exact cardinal angles */
+	s32 frac_mask = (1 << radix) - 1;
+	if ((deg & frac_mask) == 0) {
+		s32 ideg = deg >> radix;
+		if (ideg == 0 || ideg == 180) return 0;
+		if (ideg == 90 || ideg == 270)
+			return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	}
+	/* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */
+	s32 d = normalize_to_r16(deg, radix);
+	{
+		const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+		s32 delta;
+		/* Near 0° */
+		if (d < DEG_THRESH) {
+			s32 up = d << 8;
+			return (FR_DEG2RAD(up) + (1 << 7)) >> 8;
+		}
+		/* Near 180° */
+		delta = d - FR_D180_R16;
+		if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+			s32 up = delta << 8;
+			return (FR_DEG2RAD(up) + (1 << 7)) >> 8;
+		}
+		/* Near 360° */
+		delta = FR_D360_R16 - d;
+		if (delta >= 0 && delta < DEG_THRESH) {
+			s32 up = delta << 8;
+			return -((FR_DEG2RAD(up) + (1 << 7)) >> 8);
+		}
+	}
+	/* Main path: convert to u16 BAM, table lookup */
+	u16 bam = fr_deg_to_bam(deg, radix);
+	s32 v = fr_tan_bam(bam);
+	/* Near-pole BAM alias: determine sign from normalized angle position */
+	if (bam == 0x4000u || bam == 0xC000u) {
+		s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16);
+		v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+	}
+	return v;
 }
+#endif /* FR_LEAN */
 
 /*=======================================================
  * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest)
@@ -250,11 +732,9 @@ s32 FR_FixAddSat(s32 x, s32 y)
 /* FR_acos — returns radians at out_radix.
  * Range: [0, pi].  Input is a cosine value at the given radix.
  *
- * Uses the same 129-entry cosine table as fr_cos_bam, but in reverse:
- * binary-search to find the bracketing pair, then linear-interpolate
- * the fractional position between them to recover the full 14-bit
- * in-quadrant BAM.  This mirrors the forward path and gives matching
- * precision (~1 LSB of s15.16 output).
+ * Uses the 129-entry sine table in reverse: binary-search the ascending
+ * table to find asin(|input|), then acos = pi/2 - asin (with sign handling
+ * for the second quadrant).
  */
 s32 FR_acos(s32 input, u16 radix, u16 out_radix)
 {
@@ -264,32 +744,24 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
 	s32 idx, d, num, frac;
 	s32 input_abs;
 
-	/* Work with absolute value at the caller's radix — we'll need it for
-	 * the sqrt fast path before quantising to r15. */
+	/* Work with absolute value at the caller's radix */
 	sign = (s16)((input < 0) ? 1 : 0);
 	input_abs = sign ? -input : input;
 
-	/* Clamp at the caller's radix — not at r15.  Near ±1.0 the r15
-	 * quantisation can round to 32767 even when the caller has sub-LSB
-	 * precision that the sqrt fast path can use. */
+	/* Clamp at the caller's radix */
 	{
 		s32 one = (s32)1 << radix;
 		if (input_abs >= one)
-			return sign ? FR_BAM2RAD(FR_BAM_HALF, out_radix) : 0;
+			return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0;
 	}
 
 	v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
 
-	/* Small-angle fast path: when cos(θ) is close to 1.0, the table
-	 * has only 2-8 LSBs of gap per entry, so linear interpolation is
-	 * very coarse.  Use the identity  acos(x) ≈ sqrt(2*(1-x)).
-	 *
-	 * Key: compute 1-x at the CALLER's radix, not r15.  Near ±1.0 the
-	 * r15 quantisation crushes many distinct inputs to the same value
-	 * (cos(179.5°)..cos(179.9°) all round to 32767 at r15).  The
-	 * caller's higher-radix bits carry the angular information via the
-	 * identity sin(θ) = sqrt(2(1-cos θ)) — effectively the sin trick. */
-	if (v > gFR_COS_TAB_Q[7])
+	/* Small-angle fast path: when cos(θ) is close to 1.0, the sine table
+	 * has poor resolution near the top (entries close together).
+	 * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121]
+	 * means the input is > cos(7*π/256) ≈ 0.9975. */
+	if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8])
 	{
 		s32 one = (s32)1 << radix;
 		s32 one_minus_x = one - input_abs;           /* 1-|x| at caller radix */
@@ -297,39 +769,31 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
 		s32 rad_native  = FR_sqrt(two_omx, radix);   /* radians at caller radix */
 		s32 rad_out     = FR_CHRDX(rad_native, radix, out_radix);
 		if (sign)
-			rad_out = FR_BAM2RAD(FR_BAM_HALF, out_radix) - rad_out;
+			rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out;
 		return rad_out;
 	}
 
-	/* Below this point we need the sign-stripped r15 value for the
-	 * binary search.  (v was already computed from input_abs above.) */
-
-	/* Binary search on the cosine quadrant table.  The table is
-	 * monotonically decreasing: gFR_COS_TAB_Q[0] = 32767 (cos 0°),
-	 * gFR_COS_TAB_Q[128] = 0 (cos 90°).
+	/* Binary search on the ascending sine table.
+	 * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°).
 	 *
-	 * After the search, lo is the first index where table[lo] <= v,
-	 * so the bracketing pair is (lo-1, lo) with table[lo-1] >= v >= table[lo].
-	 */
+	 * Find the first index where table[idx] >= v. */
 	lo = 0;
 	hi = FR_TRIG_TABLE_SIZE;
 	while (lo < hi)
 	{
 		mid = (lo + hi) >> 1;
-		if (gFR_COS_TAB_Q[mid] > v)
+		if ((s32)gFR_SIN_TAB_Q[mid] < v)
 			lo = mid + 1;
 		else
 			hi = mid;
 	}
 
-	/* lo is now the index where table[lo] <= v.  The bracketing interval
-	 * is [lo-1, lo] (table decreasing).  Clamp idx to valid range.
-	 */
+	/* lo is now the first index where table[lo] >= v.
+	 * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo].
+	 * This gives us the asin angle; acos = pi/2 - asin. */
 	idx = lo;
 	if (idx <= 0)
 	{
-		/* v >= table[0] = 32767 — essentially cos(0), already clamped above
-		 * but guard anyway. */
 		idx = 0;
 		frac = 0;
 	}
@@ -340,29 +804,27 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
 	}
 	else
 	{
-		/* Linear interpolate between table[idx-1] and table[idx].
-		 * d = table[idx-1] - table[idx]  (>= 0, cos decreasing)
-		 * num = table[idx-1] - v          (how far past table[idx-1])
-		 * frac = (num << FR_TRIG_FRAC_BITS) / d, in [0, FR_TRIG_FRAC_MAX)
-		 *
-		 * num and d are both in [0, 32767], so num << 7 fits in 22 bits.
+		/* Interpolate between table[idx-1] and table[idx].
+		 * d = table[idx] - table[idx-1]  (>= 0, sin increasing)
+		 * num = v - table[idx-1]          (how far past table[idx-1])
 		 */
-		d   = gFR_COS_TAB_Q[idx - 1] - gFR_COS_TAB_Q[idx];
-		num = gFR_COS_TAB_Q[idx - 1] - v;
+		d   = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1];
+		num = v - (s32)gFR_SIN_TAB_Q[idx - 1];
 		if (d > 0)
 			frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d;
 		else
 			frac = 0;
-		/* Reconstruct: the angle is at index (idx-1) + frac/FRAC_MAX,
-		 * so shift idx back by 1 for the BAM calculation below. */
 		idx = idx - 1;
 	}
 
 	{
-		u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+		/* asin_bam is the angle in first-quadrant BAM whose sin = v */
+		u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+		/* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */
+		u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam);
 		if (sign)
 			bam = (u16)(FR_BAM_HALF - bam);  /* mirror: pi - angle */
-		return FR_BAM2RAD(bam, out_radix);
+		return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
 	}
 }
 
@@ -370,7 +832,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
 s32 FR_asin(s32 input, u16 radix, u16 out_radix)
 {
 	/* asin(x) = pi/2 - acos(x) */
-	s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);
+	s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
 	return half_pi - FR_acos(input, radix, out_radix);
 }
 
@@ -394,12 +856,12 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
 	/* Axis cases — exact angles, no divide. */
 	if (x == 0)
 	{
-		if (y > 0) return  FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);     /*  pi/2 */
-		if (y < 0) return -FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);     /* -pi/2 */
+		if (y > 0) return  FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /*  pi/2 */
+		if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);     /* -pi/2 */
 		return 0;
 	}
 	if (y == 0)
-		return (x > 0) ? 0 : FR_BAM2RAD(FR_BAM_HALF, out_radix);      /* 0 or pi */
+		return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix);      /* 0 or pi */
 
 	ax = (x < 0) ? -x : x;
 	ay = (y < 0) ? -y : y;
@@ -443,7 +905,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
 		if (cos_val < FR_ATAN2_SMALL)
 		{
 			/* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */
-			s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);
+			s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
 			q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix);
 		}
 		else
@@ -453,7 +915,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
 	/* Apply quadrant from signs of x and y.
 	 * q1_angle is always positive [0..pi/2]. */
 	{
-		s32 pi = FR_BAM2RAD(FR_BAM_HALF, out_radix);
+		s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
 		if (x > 0)
 			return (y > 0) ? q1_angle : -q1_angle;
 		/* x < 0: mirror across y-axis */
@@ -658,11 +1120,13 @@ s32 FR_ln(s32 input, u16 radix, u16 output_radix)
 	return FR_MULK28(r, FR_krLOG2E_28);
 }
 
+#ifndef FR_LEAN
 s32 FR_log10(s32 input, u16 radix, u16 output_radix)
 {
 	s32 r = FR_log2(input, radix, output_radix);
 	return FR_MULK28(r, FR_krLOG2_10_28);
 }
+#endif
 
 #ifndef FR_NO_PRINT
 /***************************************
@@ -1016,6 +1480,7 @@ s32 FR_sqrt(s32 input, u16 radix)
  *
  * Side effects: none. Pure function.
  */
+#ifndef FR_LEAN
 s32 FR_hypot(s32 x, s32 y, u16 radix)
 {
 	uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x);
@@ -1023,6 +1488,7 @@ s32 FR_hypot(s32 x, s32 y, u16 radix)
 	(void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */
 	return (s32)fr_isqrt64(xx + yy);
 }
+#endif
 
 /*=======================================================
  * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation.
diff --git a/src/FR_math.h b/src/FR_math.h
index 6eff284..a2db262 100644
--- a/src/FR_math.h
+++ b/src/FR_math.h
@@ -32,14 +32,18 @@
 #ifndef __FR_Math_h__
 #define __FR_Math_h__
 
-#define FR_MATH_VERSION     "2.0.7"
-#define FR_MATH_VERSION_HEX  0x020007  /* major << 16 | minor << 8 | patch */
+#define FR_MATH_VERSION     "2.0.8"
+#define FR_MATH_VERSION_HEX  0x020008  /* major << 16 | minor << 8 | patch */
 
 #ifdef FR_CORE_ONLY
 #define FR_NO_PRINT
 #define FR_NO_WAVES
 #endif
 
+#ifdef FR_LEAN
+#define FR_NO_WAVES
+#endif
+
 #ifdef __cplusplus
 extern "C"
 {
@@ -49,21 +53,14 @@ extern "C"
 #include "FR_defs.h"
 #endif
 
-/* Quick Note on MACRO param wrapping:
- * All macro inputs are wrapped in paranthesis in this code.
- * eg: #define MACRO_X_SQUARED(x)  ((x)*(x)) //<<-- note internal paranthesis
- * this is done because macros use true source substitution in C/C++ so a if
- * a macro internally uses many operators of mixed precedence e.g. >> and * together
- * undesired behavior can result if the parameter "passed" in the the macro is a
- * a complex contruct e.g. instead of being a value or single variable is a
- * something like 3+4*5  --> all of this would gets substituted in to the MACRO
- * expression and parans eliminate chances for odd behavior.
- * For example:
- * MACRO_X_SQUARED_BAD(x) (x*x)
- * will expand this way:
- * 3+4*5*3+4*5 ==> 3+60+20 == 83 // due to precedence operations whereas
- * MACRO_X_SQUARED(x) ((x)*(x))
- * (3+4*5)*(3+4*5) ==> (3+20)*(3+20) == (23)*(23) == 529
+/* Quick note on macro parameter wrapping:
+ * Arguments are parenthesized in expansions, e.g.
+ *   #define MACRO_X_SQUARED(x)  ((x)*(x))   // inner parens around each x
+ * Macros substitute text as-is. If a parameter is an expression like 3+4*5
+ * and the body mixes operators without extra parentheses, precedence errors
+ * follow. Parenthesize parameters (and fragile subexpressions) in the macro body.
+ * Example: MACRO_X_SQUARED_BAD(x)  (x*x)  ->  3+4*5*3+4*5  == 83 (wrong).
+ *          MACRO_X_SQUARED(x)  ((x)*(x))  ->  (3+4*5)*(3+4*5)  == 529 (right).
  */
 
 /*absolute value for integer and fixed radix types*/
@@ -258,7 +255,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
 /*================================================
  * Constants used in Trig tables, definitions
  *
- * FR_TRIG_PREC     — internal table precision (s0.15, kept for table indexing)
+ * FR_TRIG_PREC     — internal table precision (u0.15, sine table)
  * FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1)
  * FR_TRIG_ONE      — exact 1.0 in output format (1 << 16 = 65536)
  *
@@ -270,8 +267,8 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
 #define FR_TRIG_OUT_PREC (16)
 #define FR_TRIG_MASK     ((1 << (FR_TRIG_PREC)) - 1)
 #define FR_TRIG_ONE      (1L << FR_TRIG_OUT_PREC)         /* 65536 = 1.0 */
-#define FR_TRIG_MAXVAL   ((s32)0x7fffffff)                 /* tan saturation */
-#define FR_TRIG_MINVAL (-FR_TRIG_MASK)
+#define FR_TRIG_MAXVAL   ((s32)0x7fffffff)                 /* tan saturation max */
+#define FR_TRIG_MINVAL   (-FR_TRIG_MAXVAL)                  /* tan saturation min */
 
 /* Bit Shift Scaling macros.  Useful on some platforms with poor MUL performance.
  * Also can be useful if you need to scale numbers with
@@ -304,32 +301,85 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
 /* scale by log2(10)   3.32192809489 used for converting pow2() to pow10 */
 #define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13))
 
-/* TRIG Conversion macros
- * Convert degrees <--> radians <--> quadrants <--> degrees
- * no multiply (may reduce chances of overflow in certain circumstances)
- * works on all int types and radixes (pure ints will have trunc err)
- * radians   = 2*pi per revolution
- * degrees   = 360  per revolution
- * quadrants = 4 per revolution
- * freq      = 1 per revolution
- */
-/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 using shifts only.
- * Worst-case relative error: ~1.6e-4 (acceptable for embedded use; if you
- * need better precision, multiply by FR_kDEG2RAD and shift down by FR_kPREC).
- * Side-effect note: x is referenced 3 times, so do not pass an expression
- * with side effects.
+/* Shift-only angular conversion macros
+ *
+ * All are pure constant multipliers expressed as shifts — no multiply, no
+ * divide, no 64-bit intermediates, no accumulators. Work at any radix: if
+ * your input is degrees at radix 8, the output is the target unit at radix 8.
+ * The caller shifts as needed.
+ *
+ * Angular units:
+ *   degrees   = 360  per revolution
+ *   radians   = 2*pi per revolution
+ *   BAM       = 65536 per revolution (Binary Angular Measure, u16)
+ *   quadrants = 4 per revolution (= BAM >> 14)
+ *
+ * Side-effect note: x is referenced multiple times in each macro — do not
+ * pass expressions with side effects.
  */
-#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13))
 
-/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.295780 using shifts only.
- * Worst-case relative error: ~2.1e-6.
- * Side-effect note: x is referenced 7 times.
- */
+/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */
+#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20))
+
+/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */
 #define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10))
 
+/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits).
+ * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term),
+ * but the overflow is harmless when the result is truncated to u16 BAM
+ * (two's complement wrapping preserves modular correctness).
+ * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */
+#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9))
+
+/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */
+#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13))
+
+/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits).
+ * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term).
+ * For safe conversion at any radix, use fr_rad_to_bam() instead. 
+ * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */
+#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14))
+/* ── Overflow-safe rad/deg to BAM conversion functions ─────────────
+ *
+ * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that
+ * need the full ±2*pi or ±360° range at any radix.
+ *
+ * Strategy: normalize input to radix 16, conditionally reduce into
+ * a safe zone, apply the full-precision shift-only multiply, then
+ * extract the u16 BAM.  No precision loss from halving/quartering.
+ *
+ * fr_rad_to_bam: reduce to [-pi, pi], reordered terms.  ±2*pi safe.
+ * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset.  ±360° safe.
+ */
+
+/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc.
+ * Compiler evaluates at compile time when r is a constant.
+ * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */
+#define FR_PI(r)             ((s32)(3.14159265358979323846  * (1LL << (r)) + 0.5))
+#define FR_TWO_PI(r)         ((s32)(6.28318530717958647692  * (1LL << (r)) + 0.5))
+#define FR_HALF_PI(r)        ((s32)(1.57079632679489661923  * (1LL << (r)) + 0.5))
+#define FR_THREE_HALF_PI(r)  ((s32)(4.71238898038468985769  * (1LL << (r)) + 0.5))
+
+/* Convenience aliases at radix 16 */
+#define FR_PI_R16       FR_PI(16)
+#define FR_TWO_PI_R16   FR_TWO_PI(16)
+
+/* Degree constants at radix 16 (exact — no truncation) */
+#define FR_D90_R16      ((s32)90  << 16)
+#define FR_D180_R16     ((s32)180 << 16)
+#define FR_D360_R16     ((s32)360 << 16)
+
+  u16 fr_rad_to_bam(s32 rad, u16 radix);
+#ifndef FR_LEAN
+  u16 fr_deg_to_bam(s32 deg, u16 radix);
+#endif
+
+/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */
+#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25))
+
+/* Legacy quadrant macros (quadrants = BAM >> 14) */
 #define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14))
 #define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11))
-
 #define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13))
 #define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1))
 
@@ -347,44 +397,12 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
  *   - The top 2 bits select the quadrant (no `% 360` modulo needed).
  *   - The next 7 bits index the 128-entry quadrant table directly.
  *   - The bottom 7 bits give linear-interpolation precision.
- *
- * All BAM macros are *macros* (not functions) so they evaluate inline and
- * cost nothing if you don't call them. Side-effect note: each macro
- * references its argument multiple times — do not pass an expression with
- * side effects.
  */
 #define FR_BAM_BITS         (16)
 #define FR_BAM_FULL         (1L << FR_BAM_BITS)         /* 65536 */
 #define FR_BAM_QUADRANT     (FR_BAM_FULL >> 2)          /* 16384 */
 #define FR_BAM_HALF         (FR_BAM_FULL >> 1)          /* 32768 */
 
-/* Convert degrees -> BAM. Exact formula: deg * 65536 / 360.
- * Computed in s32; for s16-range deg the intermediate (deg << 16) fits.
- * The cast to u16 wraps modulo full circle, which is mathematically correct.
- * Side-effect note: deg is referenced twice for sign-aware rounding.
- *
- * Worst-case error: <= 0.5 LSB BAM (~0.0028 deg) per degree. No accumulation
- * across full circles.
- */
-#define FR_DEG2BAM(deg)     ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
-
-/* Convert BAM -> degrees. bam * (360 / 65536) ≈ bam * (45/8192).
- * Truncated; result is integer degrees.
- */
-#define FR_BAM2DEG(bam)     ((s16)(((s32)(u16)(bam) * 45) >> 13))
-
-/* Convert radians (at given radix) -> BAM. rad * (65536 / (2*pi)) ≈ rad * 10430.378
- * For radix-16 input: ((rad * 10430) >> 16). Approximated; for high accuracy
- * combine with FR_kRAD2Q multiplier.
- */
-#define FR_RAD2BAM(rad, radix)  ((u16)(((s32)(rad) * 10430L) >> (radix)))
-
-/* Convert BAM -> radians at the requested output radix.
- * Derivation: rad = bam * 2π / 65536. At output radix r: bam * 2π * 2^r / 2^16
- *           = bam * (2π * 2^10) / 2^(26 - r) = bam * 6434 >> (26 - r).
- */
-#define FR_BAM2RAD(bam, radix)  ((s32)(((s32)(u16)(bam) * 6434L) >> (26 - (radix))))
-
 /*===============================================
  * Radian-native and BAM-native trig (recommended)
  *
@@ -397,39 +415,59 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
  *   fr_cos(rad, radix)      — cos of radians at radix,   s15.16 result
  *   fr_sin(rad, radix)      — sin of radians at radix,   s15.16 result
  *   fr_tan(rad, radix)      — tan of radians at radix,   s15.16 result
- *   fr_cos_deg(deg)         — cos of integer degrees,    s15.16 result
- *   fr_sin_deg(deg)         — sin of integer degrees,    s15.16 result
+ *   fr_cos_deg(deg, radix)  — cos of fixed-radix degrees, s15.16 result
+ *   fr_sin_deg(deg, radix)  — sin of fixed-radix degrees, s15.16 result
+ *   fr_tan_deg(deg, radix)  — tan of fixed-radix degrees, s15.16 result
  *
  * All go through the same 129-entry quadrant table with linear interpolation.
  * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four
  * cardinal angles where the result is exact.
+ *
+ * The radian and degree wrappers (fr_sin, fr_cos, fr_tan, etc.) range-reduce
+ * their input, convert to u16 BAM, and call the BAM-native functions. Small-
+ * angle bypasses at the zero crossings (sin≈0, cos≈0, tan≈0) use the linear
+ * approximation sin(δ)≈δ to avoid BAM quantization error where it matters most.
  */
   s32 fr_cos_bam(u16 bam);
   s32 fr_sin_bam(u16 bam);
+#ifndef FR_LEAN
+  s32 fr_tan_bam(u16 bam);
+#endif
   s32 fr_cos(s32 rad, u16 radix);
   s32 fr_sin(s32 rad, u16 radix);
   s32 fr_tan(s32 rad, u16 radix);
 
-#define fr_cos_deg(deg)  fr_cos_bam(FR_DEG2BAM(deg))
-#define fr_sin_deg(deg)  fr_sin_bam(FR_DEG2BAM(deg))
+/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */
+#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
 
+/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */
+/* #define fr_cos_deg(deg)  fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
+/* #define fr_sin_deg(deg)  fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
+
+#ifndef FR_LEAN
 /*===============================================
- * Integer-degree trig API (thin wrappers over the BAM-native path)
- *
- *   FR_CosI(deg)            — cos of integer degrees,       s15.16 result
- *   FR_SinI(deg)            — sin of integer degrees,       s15.16 result
- *   FR_TanI(deg)            — tan of integer degrees,       s15.16 result
- *   FR_Cos(deg, radix)      — cos of fixed-radix degrees,   s15.16 result
- *   FR_Sin(deg, radix)      — sin of fixed-radix degrees,   s15.16 result
- *   FR_Tan(deg, radix)      — tan of fixed-radix degrees,   s15.16 result
+ * Degree-input trig API
+ *
+ *   FR_CosI(deg)              — cos of integer degrees,       s15.16 result
+ *   FR_SinI(deg)              — sin of integer degrees,       s15.16 result
+ *   FR_TanI(deg)              — tan of integer degrees,       s15.16 result
+ *   fr_cos_deg(deg, radix)    — cos of fixed-radix degrees,   s15.16 result
+ *   fr_sin_deg(deg, radix)    — sin of fixed-radix degrees,   s15.16 result
+ *   fr_tan_deg(deg, radix)    — tan of fixed-radix degrees,   s15.16 result
  */
-#define FR_CosI(deg)  fr_cos_bam(FR_DEG2BAM(deg))
-#define FR_SinI(deg)  fr_sin_bam(FR_DEG2BAM(deg))
+#define FR_CosI(deg)  fr_cos_bam(FR_DEG2BAM_I(deg))
+#define FR_SinI(deg)  fr_sin_bam(FR_DEG2BAM_I(deg))
+
+  s32 fr_cos_deg(s32 deg, u16 radix);
+  s32 fr_sin_deg(s32 deg, u16 radix);
+  s32 FR_TanI(s32 deg);
+  s32 fr_tan_deg(s32 deg, u16 radix);
 
-  s32 FR_Cos(s16 deg, u16 radix);
-  s32 FR_Sin(s16 deg, u16 radix);
-  s32 FR_TanI(s16 deg);
-  s32 FR_Tan(s16 deg, u16 radix);
+  /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */
+  #define FR_Sin  fr_sin_deg
+  #define FR_Cos  fr_cos_deg
+  #define FR_Tan  fr_tan_deg
+#endif /* FR_LEAN */
 
   /* Inverse trig — output in radians at caller-specified radix (s32).
    * FR_atan2 returns radians at radix 16 (s15.16).
@@ -446,7 +484,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
 
   s32 FR_log2(s32 input, u16 radix, u16 output_radix);
   s32 FR_ln(s32 input, u16 radix, u16 output_radix);
+#ifndef FR_LEAN
   s32 FR_log10(s32 input, u16 radix, u16 output_radix);
+#endif
 
   /* Power */
   s32 FR_pow2(s32 input, u16 radix);
@@ -494,7 +534,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
  * can check `result == FR_DOMAIN_ERROR` to detect domain errors.
  */
   s32 FR_sqrt(s32 input, u16 radix);
+#ifndef FR_LEAN
   s32 FR_hypot(s32 x, s32 y, u16 radix);
+#endif
 
   /* Fast approximate magnitude — shift-only, no multiply, no 64-bit.
    * Based on piecewise-linear approximation of sqrt(x*x + y*y).
diff --git a/src/FR_math_2D.cpp b/src/FR_math_2D.cpp
index c9025b3..b45ca75 100644
--- a/src/FR_math_2D.cpp
+++ b/src/FR_math_2D.cpp
@@ -5,7 +5,7 @@
  *
  *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
  *  @author M A Chatterjee <deftio [at] deftio [dot] com>
- *	@version 2.0.7 M. A. Chatterjee, cleaned up naming
+ *	@version 2.0.8 M. A. Chatterjee, cleaned up naming
  *
  *  This file contains integer math settable fixed point radix math routines for
  *  use on systems in which floating point is not desired or unavailable.
diff --git a/src/FR_math_2D.h b/src/FR_math_2D.h
index 8f16330..3eaf7d3 100644
--- a/src/FR_math_2D.h
+++ b/src/FR_math_2D.h
@@ -3,7 +3,7 @@
  *
  *	@copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
  *  @author M A Chatterjee <deftio [at] deftio [dot] com>
- *	@version 2.0.7 M. A. Chatterjee, cleaned up naming
+ *	@version 2.0.8 M. A. Chatterjee, cleaned up naming
  *
  *  This file contains integer math settable fixed point radix math routines for
  *  use on systems in which floating point is not desired or unavailable.
diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h
deleted file mode 100644
index 03a34cd..0000000
--- a/src/FR_trig_table.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- *  @file FR_trig_table.h - 129-entry quadrant cosine table for FR_Math 2.0
- *
- *  This table covers one quadrant [0, pi/2] inclusive in 128 intervals (so
- *  129 entries). Indexed by a 7-bit BAM (binary angular measure) sub-index.
- *  Used by fr_cos_bam / fr_sin_bam in FR_math.c.
- *
- *  Output format: s0.15 (signed, 15 fractional bits). So
- *      gFR_COS_TAB_Q[0]   = round(cos(0) * 32767)        = 32767
- *      gFR_COS_TAB_Q[64]  = round(cos(pi/4) * 32767)     ~ 23170
- *      gFR_COS_TAB_Q[128] = round(cos(pi/2) * 32767)     = 0
- *
- *  Generated by tools/coef-gen.py — do not hand-edit.
- *
- *  @copy Copyright (C) <2001-2026>  <M. A. Chatterjee>
- *  @author M A Chatterjee <deftio [at] deftio [dot] com>
- *
- *  Same zlib license as the rest of the library.
- */
-#ifndef __FR_TRIG_TABLE_H__
-#define __FR_TRIG_TABLE_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define FR_TRIG_TABLE_BITS  (7)                          /* log2(intervals) */
-#define FR_TRIG_TABLE_SIZE  ((1 << FR_TRIG_TABLE_BITS) + 1) /* entries = intervals + 1 */
-
-/* Derived constants for fr_cos_bam / fr_sin_bam.
- *
- * The BAM has 16 bits total: 2 top bits for quadrant, 14 bits in-quadrant.
- * The in-quadrant value is split into (FR_TRIG_TABLE_BITS) table-index bits
- * and (FR_TRIG_FRAC_BITS) interpolation-fraction bits, so
- *     FR_TRIG_TABLE_BITS + FR_TRIG_FRAC_BITS = 14.
- *
- * Changing FR_TRIG_TABLE_BITS (and regenerating the table with coef-gen.py)
- * is the single knob for ROM-vs-precision trade-off. Every other constant
- * below derives from it automatically.
- */
-#define FR_TRIG_FRAC_BITS   (14 - FR_TRIG_TABLE_BITS)
-#define FR_TRIG_FRAC_MAX    (1 << FR_TRIG_FRAC_BITS)
-#define FR_TRIG_FRAC_MASK   (FR_TRIG_FRAC_MAX - 1)
-#define FR_TRIG_FRAC_HALF   (FR_TRIG_FRAC_MAX >> 1)   /* rounding bias */
-#define FR_TRIG_QUADRANT    (1 << 14)                 /* in-quadrant span */
-
-static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = {
-    32767, 32765, 32757, 32745, 32728, 32705, 32678, 32646,
-    32609, 32567, 32521, 32469, 32412, 32351, 32285, 32213,
-    32137, 32057, 31971, 31880, 31785, 31685, 31580, 31470,
-    31356, 31237, 31113, 30985, 30852, 30714, 30571, 30424,
-    30273, 30117, 29956, 29791, 29621, 29447, 29268, 29085,
-    28898, 28706, 28510, 28310, 28105, 27896, 27683, 27466,
-    27245, 27019, 26790, 26556, 26319, 26077, 25832, 25582,
-    25329, 25072, 24811, 24547, 24279, 24007, 23731, 23452,
-    23170, 22884, 22594, 22301, 22005, 21705, 21403, 21096,
-    20787, 20475, 20159, 19841, 19519, 19195, 18868, 18537,
-    18204, 17869, 17530, 17189, 16846, 16499, 16151, 15800,
-    15446, 15090, 14732, 14372, 14010, 13645, 13279, 12910,
-    12539, 12167, 11793, 11417, 11039, 10659, 10278,  9896,
-     9512,  9126,  8739,  8351,  7962,  7571,  7179,  6786,
-     6393,  5998,  5602,  5205,  4808,  4410,  4011,  3612,
-     3212,  2811,  2410,  2009,  1608,  1206,   804,   402,
-        0
-};
-
-#ifdef __cplusplus
-} // extern "C"
-#endif
-
-#endif /* __FR_TRIG_TABLE_H__ */
diff --git a/tests/fr_math_test.c b/tests/fr_math_test.c
index 4c095cc..346b840 100644
--- a/tests/fr_math_test.c
+++ b/tests/fr_math_test.c
@@ -93,5 +93,5 @@ int main()
 	else
 		printf("tests failed.\n");
 
-	return result; /* remember the value 0 is considered passing in a travis-ci sense */
+	return result; /* remember the value 0 is considered passing in a ci sense */
 }
\ No newline at end of file
diff --git a/tests/test_full_coverage.c b/tests/test_full_coverage.c
index 0dfd248..36c00f0 100644
--- a/tests/test_full_coverage.c
+++ b/tests/test_full_coverage.c
@@ -188,7 +188,7 @@ int test_div() {
 int test_trig_complete() {
     s16 result;
     s32 result32;
-    
+
     /* Test CosI with all quadrants and edge cases */
     result = FR_CosI(0);
     result = FR_CosI(45);
@@ -199,51 +199,113 @@ int test_trig_complete() {
     result = FR_CosI(270);
     result = FR_CosI(315);
     result = FR_CosI(360);
-    
+
     /* Test angles > 180 to hit the branch */
     result = FR_CosI(200);  /* > 180, will subtract 360 */
     result = FR_CosI(350);  /* > 180, will subtract 360 */
-    
+
     /* Test angles < -180 to hit that branch */
     result = FR_CosI(-200); /* < -180, will add 360 */
     result = FR_CosI(-350); /* < -180, will add 360 */
-    
+
     /* Test SinI */
     result = FR_SinI(0);
     result = FR_SinI(90);
     result = FR_SinI(180);
     result = FR_SinI(270);
-    
+
     /* Test FR_Cos with radix (interpolated) */
     result = FR_Cos(45, 8);
     result = FR_Cos(90, 8);
     result = FR_Cos(180, 8);
-    
+
     /* Test FR_Sin with radix */
     result = FR_Sin(45, 8);
     result = FR_Sin(90, 8);
-    
+
     /* Test TanI with all special cases */
     result32 = FR_TanI(0);
+    if (result32 != 0) return TEST_FAIL;               /* tan(0°) = 0 */
     result32 = FR_TanI(45);
-    result32 = FR_TanI(90);   /* Special case: returns max */
+    if (result32 != 65536) return TEST_FAIL;            /* tan(45°) = 1.0 = 65536 */
+    result32 = FR_TanI(90);
+    if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL;   /* pole: +max */
     result32 = FR_TanI(135);
+    if (result32 != -65536) return TEST_FAIL;           /* tan(135°) = -1.0 */
     result32 = FR_TanI(180);
-    result32 = FR_TanI(270);  /* Special case: returns -max */
-    result32 = FR_TanI(-45);  /* Negative angle */
-    result32 = FR_TanI(-90);  /* Negative 90 */
+    if (result32 != 0) return TEST_FAIL;                /* tan(180°) = 0 */
+    result32 = FR_TanI(270);
+    if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL;   /* pole: +max (positive deg) */
+    result32 = FR_TanI(-45);
+    if (result32 != -65536) return TEST_FAIL;           /* tan(-45°) = -1.0 */
+    result32 = FR_TanI(-90);
+    if (result32 != -FR_TRIG_MAXVAL) return TEST_FAIL;  /* pole: -max */
     result32 = FR_TanI(200);  /* > 180 */
     result32 = FR_TanI(-200); /* < -180 */
-    
+
     /* Test FR_Tan with radix */
     result32 = FR_Tan(45, 8);
     result32 = FR_Tan(30, 8);
-    
+
     (void)result;
     (void)result32;
     return TEST_PASS;
 }
 
+/* Test fr_tan_bam BAM-native tangent */
+int test_tan_bam() {
+    s32 result;
+
+    /* Exact zeros: 0° and 180° */
+    result = fr_tan_bam(0);                     /* 0° */
+    if (result != 0) return TEST_FAIL;
+    result = fr_tan_bam(0x8000);                /* 180° */
+    if (result != 0) return TEST_FAIL;
+
+    /* Exact poles: 90° and 270° */
+    result = fr_tan_bam(0x4000);                /* 90° = +pole */
+    if (result != FR_TRIG_MAXVAL) return TEST_FAIL;
+    result = fr_tan_bam(0xC000);                /* 270° = -pole */
+    if (result != -FR_TRIG_MAXVAL) return TEST_FAIL;
+
+    /* 45° = 0x2000: tan(45°) = 1.0 = 65536 in s15.16 */
+    result = fr_tan_bam(0x2000);
+    if (result != 65536) return TEST_FAIL;
+
+    /* 135° = 0x6000: tan(135°) = -1.0 */
+    result = fr_tan_bam(0x6000);
+    if (result != -65536) return TEST_FAIL;
+
+    /* 225° = 0xA000: tan(225°) = 1.0 (same as 45°) */
+    result = fr_tan_bam(0xA000);
+    if (result != 65536) return TEST_FAIL;
+
+    /* 315° = 0xE000: tan(315°) = -1.0 */
+    result = fr_tan_bam(0xE000);
+    if (result != -65536) return TEST_FAIL;
+
+    /* 30° ≈ BAM 5461: tan(30°) = 1/sqrt(3) ≈ 0.57735 → 37837 in s15.16
+     * Allow ±50 LSB for table interpolation error */
+    result = fr_tan_bam(5461);
+    if (result < 37700 || result > 37950) return TEST_FAIL;
+
+    /* 60° ≈ BAM 10923: tan(60°) = sqrt(3) ≈ 1.73205 → 113512 in s15.16
+     * This exercises the second-octant (reciprocal) path. Allow ±200 LSB. */
+    result = fr_tan_bam(10923);
+    if (result < 113200 || result > 113800) return TEST_FAIL;
+
+    /* Near-pole: 89° ≈ BAM 16202: tan(89°) ≈ 57.29 → huge.
+     * Just verify it's large and positive. */
+    result = fr_tan_bam(16202);
+    if (result < 3000000) return TEST_FAIL;   /* > 45.8 in s15.16 */
+
+    /* Near-pole: 91° ≈ BAM 16566: tan(91°) ≈ -57.29 → large negative */
+    result = fr_tan_bam(16566);
+    if (result > -3000000) return TEST_FAIL;
+
+    return TEST_PASS;
+}
+
 /* Test inverse trig functions */
 int test_inverse_trig() {
     s32 result, input;
@@ -748,8 +810,8 @@ int test_edge_branches() {
      * cos==0 and we hit the saturation return. */
     r32 = FR_Tan(90, 0);                     /* bam=16384 (sin>0) */
     if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL;
-    r32 = FR_Tan(270, 0);                    /* bam=49152 (sin<0) */
-    if (r32 != -FR_TRIG_MAXVAL) return TEST_FAIL;
+    r32 = FR_Tan(270, 0);                    /* pole: positive deg → +MAXVAL */
+    if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL;
 
     /* FR_atan2 now returns radians at out_radix.
      * At radix 16: pi/2 ≈ 102944, pi ≈ 205887.
@@ -1031,6 +1093,7 @@ int main() {
     
     printf("\nTrigonometry (Complete):\n");
     RUN_TEST(test_trig_complete);
+    RUN_TEST(test_tan_bam);
     RUN_TEST(test_inverse_trig);
     
     printf("\nLogarithms & Powers (Complete):\n");
diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp
index 5a70a0a..4bff9b2 100644
--- a/tests/test_tdd.cpp
+++ b/tests/test_tdd.cpp
@@ -58,7 +58,7 @@
  * ============================================================ */
 
 static inline double frd(s32 x, int radix) {
-    return (double)x / (double)(1L << radix);
+    return (double)x / ldexp(1.0, radix);
 }
 
 typedef struct {
@@ -73,13 +73,19 @@ typedef struct {
     double worst_pct_input;    /* input that produced max pct error */
     double worst_pct_actual;
     double worst_pct_expected;
+    /* Clamped-denominator relative error: denom = max(|expected|, 1% of full_scale) */
+    double max_pct_err_clamped;
+    double sum_pct_err_clamped;
+    double worst_clamped_input;
+    double worst_clamped_actual;
+    double worst_clamped_expected;
 } stats_t;
 
 static void stats_reset(stats_t *s) {
     memset(s, 0, sizeof(*s));
 }
 
-static void stats_add(stats_t *s, double in, double actual, double expected) {
+static void stats_add(stats_t *s, double in, double actual, double expected, double full_scale) {
     double e = actual - expected;
     if (e < 0) e = -e;
     if (s->n == 0 || e > s->max_abs_err) {
@@ -89,8 +95,7 @@ static void stats_add(stats_t *s, double in, double actual, double expected) {
         s->worst_expected = expected;
     }
     s->sum_abs_err += e;
-    /* Skip percent error when expected ≈ 0 to avoid division artifacts */
-    double pct = (fabs(expected) > 0.01) ? (e / fabs(expected)) * 100.0 : 0.0;
+    double pct = (expected != 0.0) ? (e / fabs(expected)) * 100.0 : (e != 0.0 ? 100.0 : 0.0);
     if (pct > s->max_pct_err) {
         s->max_pct_err = pct;
         s->worst_pct_input = in;
@@ -98,6 +103,17 @@ static void stats_add(stats_t *s, double in, double actual, double expected) {
         s->worst_pct_expected = expected;
     }
     s->sum_pct_err += pct;
+    /* Clamped-denominator relative error: floor = 1% of full_scale */
+    double floor_val = 0.01 * full_scale;
+    double denom = fabs(expected) > floor_val ? fabs(expected) : floor_val;
+    double pct_clamped = (denom > 0.0) ? (e / denom) * 100.0 : 0.0;
+    if (pct_clamped > s->max_pct_err_clamped) {
+        s->max_pct_err_clamped = pct_clamped;
+        s->worst_clamped_input = in;
+        s->worst_clamped_actual = actual;
+        s->worst_clamped_expected = expected;
+    }
+    s->sum_pct_err_clamped += pct_clamped;
     s->n++;
 }
 
@@ -105,8 +121,28 @@ static double stats_mean(const stats_t *s) {
     return s->n ? s->sum_abs_err / s->n : 0.0;
 }
 
-static double stats_mean_pct(const stats_t *s) {
-    return s->n ? s->sum_pct_err / s->n : 0.0;
+static double stats_mean_pct_clamped(const stats_t *s) {
+    return s->n ? s->sum_pct_err_clamped / s->n : 0.0;
+}
+
+/* Quantize a double to s15.16 resolution (same grid as library output). */
+static inline double q16(double x) {
+    return floor(x * 65536.0 + 0.5) / 65536.0;
+}
+
+/* Round-to-nearest float→fixed conversion (not truncation). */
+static inline s32 tofix(double v, int p) {
+    return (s32)floor(ldexp(v, p) + 0.5);
+}
+
+/* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */
+static const double TAN_CLAMP = (double)0x7fffffff / 65536.0;
+
+static double tan_ref(double rad) {
+    double t = tan(rad);
+    if (t >  TAN_CLAMP) return TAN_CLAMP;
+    if (t < -TAN_CLAMP) return -TAN_CLAMP;
+    return t;
 }
 
 /* Set by FR_SHOWPEAK env var — adds a "Peak at" column to the accuracy table */
@@ -115,9 +151,9 @@ static int g_showpeak = 0;
 /* Print one accuracy table row, optionally with peak-error input */
 static void acc_row(const char *name, const stats_t *s, const char *note) {
     printf("| %s | %.4f | %.4f | %s",
-           name, s->max_pct_err, stats_mean_pct(s), note);
+           name, s->max_pct_err_clamped, stats_mean_pct_clamped(s), note);
     if (g_showpeak)
-        printf(" | %.4g", s->worst_pct_input);
+        printf(" | %.4g", s->worst_clamped_input);
     printf(" |\n");
 }
 
@@ -633,8 +669,8 @@ static void section_arithmetic(void) {
     };
     for (int i = 0; i < (int)(sizeof(div_cases)/sizeof(div_cases[0])); i++) {
         int r = div_cases[i].r;
-        s32 xfp = (s32)(div_cases[i].xd * (1L << r));
-        s32 yfp = (s32)(div_cases[i].yd * (1L << r));
+        s32 xfp = tofix(div_cases[i].xd, r);
+        s32 yfp = tofix(div_cases[i].yd, r);
         double expected = div_cases[i].xd / div_cases[i].yd;
         s32 d64 = FR_DIV(xfp, r, yfp, r);
         s32 d32 = FR_DIV32(xfp, r, yfp, r);
@@ -667,8 +703,8 @@ static void section_trig_int(void) {
         double exp_sin = sin(deg * M_PI / 180.0);
         double act_cos = frd(FR_CosI((s16)deg), FR_TRIG_OUT_PREC);
         double act_sin = frd(FR_SinI((s16)deg), FR_TRIG_OUT_PREC);
-        stats_add(&cos_stats, deg, act_cos, exp_cos);
-        stats_add(&sin_stats, deg, act_sin, exp_sin);
+        stats_add(&cos_stats, deg, act_cos, exp_cos, 1.0);
+        stats_add(&sin_stats, deg, act_sin, exp_sin, 1.0);
     }
 
     table_header_stats();
@@ -684,7 +720,7 @@ static void section_trig_int(void) {
         if (deg % 90 == 0 && deg != 0) { tan_skipped++; continue; }
         double exp_tan = tan(deg * M_PI / 180.0);
         double act_tan = frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC);
-        stats_add(&tan_stats, deg, act_tan, exp_tan);
+        stats_add(&tan_stats, deg, act_tan, exp_tan, TAN_CLAMP);
     }
     table_header_stats();
     table_row_stats("FR_TanI [-89..89]", &tan_stats);
@@ -722,8 +758,8 @@ static void section_trig_frac(void) {
         double exp_s = sin(deg_d * M_PI / 180.0);
         double act_c = frd(FR_Cos(deg_fr, 8), FR_TRIG_OUT_PREC);
         double act_s = frd(FR_Sin(deg_fr, 8), FR_TRIG_OUT_PREC);
-        stats_add(&cos_f, deg_d, act_c, exp_c);
-        stats_add(&sin_f, deg_d, act_s, exp_s);
+        stats_add(&cos_f, deg_d, act_c, exp_c, 1.0);
+        stats_add(&sin_f, deg_d, act_s, exp_s, 1.0);
     }
     table_header_stats();
     table_row_stats("FR_Cos r8 0.25 step", &cos_f);
@@ -759,10 +795,11 @@ static void section_inverse_trig(void) {
     /* radix 15 inputs, output radians at radix 16, 200 samples */
     for (int i = -200; i <= 200; i++) {
         double xd = i / 200.0;
-        s32 fr = (s32)(xd * (1 << 15));
+        s32 fr = tofix(xd, 15);
+        double actual_xd = frd(fr, 15);
         s32 rad = FR_acos(fr, 15, 16);
-        double ref_rad = acos(xd);
-        stats_add(&acos_stats, xd, frd(rad, 16), ref_rad);
+        double ref_rad = acos(actual_xd);
+        stats_add(&acos_stats, actual_xd, frd(rad, 16), ref_rad, M_PI);
     }
     table_header_stats();
     table_row_stats("FR_acos vs acos() (rad)", &acos_stats);
@@ -773,10 +810,11 @@ static void section_inverse_trig(void) {
     stats_reset(&asin_stats);
     for (int i = -200; i <= 200; i++) {
         double xd = i / 200.0;
-        s32 fr = (s32)(xd * (1 << 15));
+        s32 fr = tofix(xd, 15);
+        double actual_xd = frd(fr, 15);
         s32 rad = FR_asin(fr, 15, 16);
-        double ref_rad = asin(xd);
-        stats_add(&asin_stats, xd, frd(rad, 16), ref_rad);
+        double ref_rad = asin(actual_xd);
+        stats_add(&asin_stats, actual_xd, frd(rad, 16), ref_rad, M_PI);
     }
     table_header_stats();
     table_row_stats("FR_asin vs asin() (rad)", &asin_stats);
@@ -812,13 +850,13 @@ static void section_pow_log(void) {
     stats_t pow2_stats; stats_reset(&pow2_stats);
     for (int i = 0; i < (int)(sizeof(pow2_inputs)/sizeof(pow2_inputs[0])); i++) {
         double x = pow2_inputs[i];
-        s32 fr = (s32)(x * (1L << 16));
+        s32 fr = tofix(x, 16);
         s32 r = FR_pow2(fr, 16);
         double rd = frd(r, 16);
         double ref = pow(2.0, x);
         double err = rd - ref; if (err < 0) err = -err;
         double rel = ref != 0.0 ? err / fabs(ref) : err;
-        stats_add(&pow2_stats, x, rd, ref);
+        stats_add(&pow2_stats, x, rd, ref, pow(2.0, 8.0));
         printf("| %.4g | %ld | %.6g | %.6g | %.4g | %.4g |\n",
                x, (long)r, rd, ref, err, rel);
     }
@@ -831,11 +869,12 @@ static void section_pow_log(void) {
     stats_t pow2_fine; stats_reset(&pow2_fine);
     for (int i = -800; i <= 800; i++) {
         double x = i / 100.0;
-        s32 fr = (s32)(x * (1L << 16));
+        s32 fr = tofix(x, 16);
+        double actual_x = frd(fr, 16);
         s32 r = FR_pow2(fr, 16);
         double rd = frd(r, 16);
-        double ref = pow(2.0, x);
-        stats_add(&pow2_fine, x, rd, ref);
+        double ref = pow(2.0, actual_x);
+        stats_add(&pow2_fine, actual_x, rd, ref, pow(2.0, 8.0));
     }
     table_header_stats();
     table_row_stats("FR_pow2 [-8,8] step 0.01", &pow2_fine);
@@ -870,7 +909,7 @@ static void section_pow_log(void) {
         printf("| %ld | %u | %u | %ld | %.6g | %.6g |\n",
                (long)log2_cases[i].in, log2_cases[i].r, log2_cases[i].or_,
                (long)r, rd, log2_cases[i].ref);
-        stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref);
+        stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref, log2(32000.0));
     }
     printf("\n");
     table_header_stats();
@@ -883,11 +922,11 @@ static void section_pow_log(void) {
     double ln_inputs[] = {1, 2, M_E, 4, 8, 10, 100, 1000};
     stats_t ln_stats; stats_reset(&ln_stats);
     for (int i = 0; i < (int)(sizeof(ln_inputs)/sizeof(ln_inputs[0])); i++) {
-        s32 fr = (s32)(ln_inputs[i] * (1L << 16));
+        s32 fr = tofix(ln_inputs[i], 16);
         s32 r = FR_ln(fr, 16, 16);
         double rd = frd(r, 16);
         double ref = log(ln_inputs[i]);
-        stats_add(&ln_stats, ln_inputs[i], rd, ref);
+        stats_add(&ln_stats, ln_inputs[i], rd, ref, log(32000.0));
         printf("| %.4g | %ld | %.6g | %.6g |\n", ln_inputs[i], (long)r, rd, ref);
     }
     printf("\n");
@@ -900,11 +939,11 @@ static void section_pow_log(void) {
     double log10_inputs[] = {1, 2, 5, 10, 100, 1000, 10000};
     stats_t log10_stats; stats_reset(&log10_stats);
     for (int i = 0; i < (int)(sizeof(log10_inputs)/sizeof(log10_inputs[0])); i++) {
-        s32 fr = (s32)(log10_inputs[i] * (1L << 16));
+        s32 fr = tofix(log10_inputs[i], 16);
         s32 r = FR_log10(fr, 16, 16);
         double rd = frd(r, 16);
         double ref = log10(log10_inputs[i]);
-        stats_add(&log10_stats, log10_inputs[i], rd, ref);
+        stats_add(&log10_stats, log10_inputs[i], rd, ref, log10(32000.0));
         printf("| %.4g | %ld | %.6g | %.6g |\n", log10_inputs[i], (long)r, rd, ref);
     }
     printf("\n");
@@ -915,14 +954,14 @@ static void section_pow_log(void) {
     md_h3("8.6 FR_EXP and FR_POW10 macros (wrap FR_pow2)");
     printf("| Expression | Result | as double | Reference | Note |\n|---|---:|---:|---:|---|\n");
     {
-        s32 in = (s32)(1.0 * (1L << 16));
+        s32 in = tofix(1.0, 16);
         s32 r = FR_EXP(in, 16);
         double rd = frd(r, 16);
         printf("| FR_EXP(1.0,16) | %ld | %.6g | %.6g | exp(1) = e |\n",
                (long)r, rd, M_E);
     }
     {
-        s32 in = (s32)(2.0 * (1L << 16));
+        s32 in = tofix(2.0, 16);
         s32 r = FR_POW10(in, 16);
         double rd = frd(r, 16);
         printf("| FR_POW10(2.0,16) | %ld | %.6g | %.6g | 10^2 = 100 |\n",
@@ -1251,14 +1290,15 @@ static void section_v2_new(void) {
     stats_t sqrt_stats; stats_reset(&sqrt_stats);
     for (int i = 0; i < (int)(sizeof(sqrt_inputs)/sizeof(sqrt_inputs[0])); i++) {
         double x = sqrt_inputs[i];
-        s32 fr = (s32)(x * (1L << 16));
+        s32 fr = tofix(x, 16);
+        double actual_x = frd(fr, 16);
         s32 r = FR_sqrt(fr, 16);
         double rd = frd(r, 16);
-        double ref = sqrt(x);
+        double ref = sqrt(actual_x);
         double err = rd - ref; if (err < 0) err = -err;
-        stats_add(&sqrt_stats, x, rd, ref);
+        stats_add(&sqrt_stats, actual_x, rd, ref, sqrt(32000.0));
         printf("| %.6g | %ld | %.6g | %.6g | %.4g |\n",
-               x, (long)r, rd, ref, err);
+               actual_x, (long)r, rd, ref, err);
     }
     printf("\n");
     table_header_stats();
@@ -1269,11 +1309,12 @@ static void section_v2_new(void) {
     stats_t sqrt_fine; stats_reset(&sqrt_fine);
     for (int i = 1; i <= 1000; i++) {
         double x = i * 10.0;     /* 10..10000 */
-        s32 fr = (s32)(x * (1L << 16));
+        s32 fr = tofix(x, 16);
+        double actual_x = frd(fr, 16);
         s32 r = FR_sqrt(fr, 16);
         double rd = frd(r, 16);
-        double ref = sqrt(x);
-        stats_add(&sqrt_fine, x, rd, ref);
+        double ref = sqrt(actual_x);
+        stats_add(&sqrt_fine, actual_x, rd, ref, sqrt(32000.0));
     }
     table_header_stats();
     table_row_stats("FR_sqrt [10,10000]", &sqrt_fine);
@@ -1299,16 +1340,16 @@ static void section_v2_new(void) {
     };
     stats_t hyp_stats; stats_reset(&hyp_stats);
     for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) {
-        s32 fx = (s32)(hyp_cases[i].x * (1L << 16));
-        s32 fy = (s32)(hyp_cases[i].y * (1L << 16));
+        s32 fx = tofix(hyp_cases[i].x, 16);
+        s32 fy = tofix(hyp_cases[i].y, 16);
+        double actual_x = frd(fx, 16), actual_y = frd(fy, 16);
         s32 r  = FR_hypot(fx, fy, 16);
         double rd = frd(r, 16);
-        double ref = hypot(hyp_cases[i].x, hyp_cases[i].y);
+        double ref = hypot(actual_x, actual_y);
         double err = rd - ref; if (err < 0) err = -err;
-        stats_add(&hyp_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y),
-                  rd, ref);
+        stats_add(&hyp_stats, ref, rd, ref, hypot(1000.0, 1000.0));
         printf("| %g | %g | %ld | %.6g | %.6g | %.4g |\n",
-               hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err);
+               actual_x, actual_y, (long)r, rd, ref, err);
     }
     printf("\n");
     table_header_stats();
@@ -1320,17 +1361,17 @@ static void section_v2_new(void) {
     printf("|---:|---:|---:|---:|---:|---:|---:|\n");
     stats_t hf8_stats; stats_reset(&hf8_stats);
     for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) {
-        s32 fx = (s32)(hyp_cases[i].x * (1L << 16));
-        s32 fy = (s32)(hyp_cases[i].y * (1L << 16));
+        s32 fx = tofix(hyp_cases[i].x, 16);
+        s32 fy = tofix(hyp_cases[i].y, 16);
+        double actual_x = frd(fx, 16), actual_y = frd(fy, 16);
         s32 r  = FR_hypot_fast8(fx, fy);
         double rd = frd(r, 16);
-        double ref = hypot(hyp_cases[i].x, hyp_cases[i].y);
+        double ref = hypot(actual_x, actual_y);
         double err = rd - ref; if (err < 0) err = -err;
         double rel = (ref > 0) ? err / ref * 100.0 : 0.0;
-        stats_add(&hf8_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y),
-                  rd, ref);
+        stats_add(&hf8_stats, ref, rd, ref, hypot(1000.0, 1000.0));
         printf("| %g | %g | %ld | %.6g | %.6g | %.4g | %.4g |\n",
-               hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err, rel);
+               actual_x, actual_y, (long)r, rd, ref, err, rel);
     }
     printf("\n");
     table_header_stats();
@@ -1386,7 +1427,7 @@ static void section_v2_new(void) {
             else if (t < 0.50) ideal =  2.0 - 4.0 * t;       /* 1 → 0 */
             else if (t < 0.75) ideal = -4.0 * (t - 0.5);     /* 0 → -1 */
             else               ideal = -1.0 + 4.0 * (t - 0.75); /* -1 → 0 */
-            stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal);
+            stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal, 1.0);
         }
         table_header_stats();
         table_row_stats("fr_wave_tri vs ideal", &tri_stats);
@@ -1472,8 +1513,8 @@ static void section_multiradix(void) {
     int log2_radixes[] = {8, 12, 16, 24};
     for (int ri = 0; ri < 4; ri++) {
         int R = log2_radixes[ri];
-        double scale = (double)(1L << R);
-        double max_val = (double)((1L << (30 - R)));  /* stay well within s32 */
+        double scale = ldexp(1.0, R);
+        double max_val = ldexp(1.0, 30 - R);  /* stay well within s32 */
         stats_t st; stats_reset(&st);
 
         /* Sweep from 0.125 to max representable value */
@@ -1484,24 +1525,26 @@ static void section_multiradix(void) {
 
         for (int i = 0; i < ninp; i++) {
             if (inputs[i] > max_val) continue;   /* would overflow s32 */
-            s32 fr = (s32)(inputs[i] * scale);
+            s32 fr = tofix(inputs[i], R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_log2(fr, (u16)R, (u16)R);
             double rd = frd(r, R);
-            double ref = log2(inputs[i]);
-            stats_add(&st, inputs[i], rd, ref);
+            double ref = log2(actual_x);
+            stats_add(&st, actual_x, rd, ref, log2(32000.0));
         }
 
         /* Fine-grained sweep in [1, min(100, max_val)] */
         double sweep_max = max_val < 100.0 ? max_val : 100.0;
         for (int i = 1; i <= 500; i++) {
             double x = 1.0 + ((sweep_max - 1.0) * i / 500.0);
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_log2(fr, (u16)R, (u16)R);
             double rd = frd(r, R);
-            double ref = log2(x);
-            stats_add(&st, x, rd, ref);
+            double ref = log2(actual_x);
+            stats_add(&st, actual_x, rd, ref, log2(32000.0));
         }
 
         double lsb = 1.0 / scale;
@@ -1521,19 +1564,20 @@ static void section_multiradix(void) {
 
     for (int ri = 0; ri < 4; ri++) {
         int R = log2_radixes[ri];
-        double scale = (double)(1L << R);
-        double max_val = (double)((1L << (30 - R)));
+        double scale = ldexp(1.0, R);
+        double max_val = ldexp(1.0, 30 - R);
         double sweep_max = max_val < 100.0 ? max_val : 100.0;
         stats_t st; stats_reset(&st);
 
         for (int i = 1; i <= 500; i++) {
             double x = 0.5 + ((sweep_max - 0.5) * i / 500.0);
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_ln(fr, (u16)R, (u16)R);
             double rd = frd(r, R);
-            double ref = log(x);
-            stats_add(&st, x, rd, ref);
+            double ref = log(actual_x);
+            stats_add(&st, actual_x, rd, ref, log(32000.0));
         }
 
         double lsb = 1.0 / scale;
@@ -1553,19 +1597,20 @@ static void section_multiradix(void) {
 
     for (int ri = 0; ri < 4; ri++) {
         int R = log2_radixes[ri];
-        double scale = (double)(1L << R);
-        double max_val = (double)((1L << (30 - R)));
+        double scale = ldexp(1.0, R);
+        double max_val = ldexp(1.0, 30 - R);
         double sweep_max = max_val < 1000.0 ? max_val : 1000.0;
         stats_t st; stats_reset(&st);
 
         for (int i = 1; i <= 500; i++) {
             double x = 0.5 + ((sweep_max - 0.5) * i / 500.0);
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_log10(fr, (u16)R, (u16)R);
             double rd = frd(r, R);
-            double ref = log10(x);
-            stats_add(&st, x, rd, ref);
+            double ref = log10(actual_x);
+            stats_add(&st, actual_x, rd, ref, log10(32000.0));
         }
 
         double lsb = 1.0 / scale;
@@ -1586,8 +1631,8 @@ static void section_multiradix(void) {
     int div_radixes[] = {8, 12, 16, 20};
     for (int ri = 0; ri < 4; ri++) {
         int R = div_radixes[ri];
-        double scale = (double)(1L << R);
-        double max_val = (double)(1L << (30 - R));  /* stay within s32 */
+        double scale = ldexp(1.0, R);
+        double max_val = ldexp(1.0, 30 - R);  /* stay within s32 */
         stats_t st_rnd, st_trunc;
         stats_reset(&st_rnd);
         stats_reset(&st_trunc);
@@ -1606,18 +1651,18 @@ static void section_multiradix(void) {
                 double aq = ay > 0 ? ax / ay : 1e30;
                 /* Skip if inputs or quotient would overflow s32 at this radix */
                 if (ax >= max_val || ay >= max_val || aq >= max_val) continue;
-                s32 xfp = (s32)(x * scale);
-                s32 yfp = (s32)(y * scale);
+                s32 xfp = tofix(x, R);
+                s32 yfp = tofix(y, R);
                 if (yfp == 0) continue;
-                double ref = x / y;
+                double ref = frd(xfp, R) / frd(yfp, R);
 
                 s32 d_rnd   = FR_DIV(xfp, R, yfp, R);
                 s32 d_trunc = FR_DIV_TRUNC(xfp, R, yfp, R);
                 double rd_rnd   = frd(d_rnd, R);
                 double rd_trunc = frd(d_trunc, R);
 
-                stats_add(&st_rnd,   x / y, rd_rnd,   ref);
-                stats_add(&st_trunc, x / y, rd_trunc, ref);
+                stats_add(&st_rnd,   x / y, rd_rnd,   ref, 1.0);
+                stats_add(&st_trunc, x / y, rd_trunc, ref, 1.0);
             }
         }
 
@@ -1644,9 +1689,9 @@ static void section_multiradix(void) {
     };
     for (int i = 0; i < (int)(sizeof(sign_cases)/sizeof(sign_cases[0])); i++) {
         int R = sign_cases[i].r;
-        double scale = (double)(1L << R);
-        s32 xfp = (s32)(sign_cases[i].x * scale);
-        s32 yfp = (s32)(sign_cases[i].y * scale);
+        double scale = ldexp(1.0, R);
+        s32 xfp = tofix(sign_cases[i].x, R);
+        s32 yfp = tofix(sign_cases[i].y, R);
         s32 d = FR_DIV(xfp, R, yfp, R);
         double rd = frd(d, R);
         double ref = sign_cases[i].x / sign_cases[i].y;
@@ -1669,7 +1714,7 @@ static void section_multiradix(void) {
     int exp_radixes[] = {8, 12, 16, 20};
     for (int ri = 0; ri < 4; ri++) {
         int R = exp_radixes[ri];
-        double scale = (double)(1L << R);
+        double scale = ldexp(1.0, R);
         stats_t st_exp, st_pow10;
         stats_reset(&st_exp);
         stats_reset(&st_pow10);
@@ -1677,23 +1722,25 @@ static void section_multiradix(void) {
         /* Sweep exp(x) for x in [-4, 4] in steps of 0.05 */
         for (int i = -80; i <= 80; i++) {
             double x = i / 20.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_EXP(fr, R);
             double rd = frd(r, R);
-            double ref = exp(x);
-            if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R)))
-                stats_add(&st_exp, x, rd, ref);
+            double ref = exp(actual_x);
+            if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R))
+                stats_add(&st_exp, actual_x, rd, ref, 32000.0);
         }
 
         /* Sweep pow10(x) for x in [-2, 2] in steps of 0.05 */
         for (int i = -40; i <= 40; i++) {
             double x = i / 20.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_POW10(fr, R);
             double rd = frd(r, R);
-            double ref = pow(10.0, x);
-            if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R)))
-                stats_add(&st_pow10, x, rd, ref);
+            double ref = pow(10.0, actual_x);
+            if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R))
+                stats_add(&st_pow10, actual_x, rd, ref, 32000.0);
         }
 
         double lsb = 1.0 / scale;
@@ -1725,10 +1772,11 @@ static void section_summary(void) {
     printf("| FR_FixMulSat | OK | 4.2, 4.3 | int64 fast path with round-to-nearest and explicit saturation |\n");
     printf("| FR_FixAddSat | OK | 4.4, 4.5 | Saturation behaves identically on LP64 host and ILP32 MCU |\n");
     printf("| FR_CosI / FR_SinI | OK | 5 | s15.16 output; exact at poles; max abs error ~1.5e-5 (1 LSB s15.16) over [-720, +720]; macros routing to fr_*_bam |\n");
-    printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | Routed through BAM trig |\n");
+    printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | BAM table lookup; 65-entry octant table; no 64-bit division |\n");
     printf("| FR_Cos / FR_Sin (interpolated) | OK | 6.1 | Within LSB-level error for r8 inputs in s16 |\n");
-    printf("| FR_Tan (interpolated) | OK | 6.2 | Locals are s32 |\n");
+    printf("| FR_Tan (interpolated) | OK | 6.2 | Via fr_tan_bam; 65-entry octant table |\n");
     printf("| fr_cos / fr_sin / fr_cos_bam / fr_sin_bam / fr_cos_deg / fr_sin_deg | OK | 6 | s15.16 output; 129-entry quadrant table with round-to-nearest linear interp; exact at cardinal angles |\n");
+    printf("| fr_tan_bam | OK | 14 | 65-entry octant table; first-octant lerp, second-octant 32-bit reciprocal; no 64-bit |\n");
     printf("| FR_acos | OK | 7.1 | Max error ~0.83° over [-1, +1] swept at 200 points |\n");
     printf("| FR_asin | OK | 7.2 | Same precision as FR_acos |\n");
     printf("| FR_atan2 | OK | 7.3 | Via asin/acos + hypot_fast8; 129-entry cos table; `FR_atan2(y, x, out_radix)` returns radians |\n");
@@ -1779,68 +1827,193 @@ static void section_summary(void) {
  * README.md, docs/README.md, and pages/index.html.
  * ============================================================ */
 
+/* ── Neighborhood printer ──────────────────────────────────────────
+ * Print ±K samples around a center index for any trig sweep.
+ * func_type selects the function to evaluate:
+ *   0 = fr_sin_bam   1 = fr_cos_bam   2 = fr_tan_bam
+ *   3 = fr_sin        4 = fr_cos        5 = fr_tan
+ *   6 = FR_SinI       7 = FR_CosI       8 = FR_TanI
+ *   9 = fr_sin_deg   10 = fr_cos_deg   11 = fr_tan_deg
+ */
+static void neighborhood(const char *label, int func_type,
+                          int center_i, int half, int N,
+                          double range_lo, double range_hi)
+{
+    printf("\n**Neighborhood: %s (center i=%d ±%d)**\n\n", label, center_i, half);
+    printf("| i | deg | input_fp | expected | got | abs_err | pct_err |\n");
+    printf("|---|---|---|---|---|---|---|\n");
+
+    for (int k = -half; k <= half; k++) {
+        int i = (center_i + k % N + N) % N;
+        double deg, angle, exp_v, got_v;
+        s32 fp;
+
+        switch (func_type) {
+        case 0: case 1: case 2: { /* BAM: 0..65535 */
+            u16 bam = (u16)i;
+            deg = bam * 360.0 / 65536.0;
+            angle = deg * M_PI / 180.0;
+            if (func_type == 0)      { exp_v = q16(sin(angle));     got_v = frd(fr_sin_bam(bam), 16); }
+            else if (func_type == 1) { exp_v = q16(cos(angle));     got_v = frd(fr_cos_bam(bam), 16); }
+            else                     { exp_v = q16(tan_ref(angle)); got_v = frd(fr_tan_bam(bam), 16); }
+            fp = (s32)bam;
+            break;
+        }
+        case 3: case 4: case 5: { /* radian: ±2π, 131072 pts */
+            angle = range_lo + (range_hi - range_lo) * i / (double)N;
+            fp = tofix(angle, 16);
+            double actual_angle = frd(fp, 16);
+            deg = actual_angle * 180.0 / M_PI;
+            if (func_type == 3)      { exp_v = q16(sin(actual_angle));     got_v = frd(fr_sin(fp, 16), 16); }
+            else if (func_type == 4) { exp_v = q16(cos(actual_angle));     got_v = frd(fr_cos(fp, 16), 16); }
+            else                     { exp_v = q16(tan_ref(actual_angle)); got_v = frd(fr_tan(fp, 16), 16); }
+            break;
+        }
+        case 6: case 7: case 8: { /* integer degrees */
+            int d = (int)range_lo + i;
+            deg = (double)d;
+            angle = d * M_PI / 180.0;
+            fp = (s32)d;
+            if (func_type == 6)      { exp_v = q16(sin(angle));     got_v = frd(FR_SinI(d), 16); }
+            else if (func_type == 7) { exp_v = q16(cos(angle));     got_v = frd(FR_CosI(d), 16); }
+            else                     { exp_v = q16(tan_ref(angle)); got_v = frd(FR_TanI((s16)d), 16); }
+            break;
+        }
+        default: { /* fixed-radix degrees: ±360, 131072 pts */
+            deg = range_lo + (range_hi - range_lo) * i / (double)N;
+            fp = tofix(deg, 16);
+            double actual_deg = frd(fp, 16);
+            angle = actual_deg * M_PI / 180.0;
+            if (func_type == 9)       { exp_v = q16(sin(angle));     got_v = frd(FR_Sin(fp, 16), 16); }
+            else if (func_type == 10) { exp_v = q16(cos(angle));     got_v = frd(FR_Cos(fp, 16), 16); }
+            else                      { exp_v = q16(tan_ref(angle)); got_v = frd(FR_Tan(fp, 16), 16); }
+            break;
+        }
+        }
+
+        double ae = fabs(got_v - exp_v);
+        double pe = (exp_v != 0.0) ? ae / fabs(exp_v) * 100.0 : (ae != 0.0 ? 100.0 : 0.0);
+        printf("| %d | %.6f | %d | %.6f | %.6f | %.6f | %.4f%% |\n",
+               i, deg, (int)fp, exp_v, got_v, ae, pe);
+    }
+    printf("\n");
+}
+
 static void section_accuracy_table(void) {
     md_h2("14. Accuracy Summary Table");
 
     printf("<!-- ACCURACY_TABLE_START -->\n");
     if (g_showpeak) {
-        printf("| Function | Max err (%%) | Avg err (%%) | Note | Peak at |\n");
+        printf("| Function | Max err (%%)*| Avg err (%%) | Note | Peak at |\n");
         printf("|---|---:|---:|---|---:|\n");
     } else {
-        printf("| Function | Max err (%%) | Avg err (%%) | Note |\n");
+        printf("| Function | Max err (%%)*| Avg err (%%) | Note |\n");
         printf("|---|---:|---:|---|\n");
     }
 
     const int R = 16;
-    const double scale = (double)(1L << R);
+    const double scale = ldexp(1.0, R);
 
     /* Persistent stats so we can print diagnostics after the table */
     stats_t st_sincos, st_tan, st_asincos, st_atan2;
+    stats_t st_rad2bam, st_deg2bam, st_sincos_deg_s32, st_tan_deg_s32;
     stats_reset(&st_sincos); stats_reset(&st_tan);
     stats_reset(&st_asincos); stats_reset(&st_atan2);
+    stats_reset(&st_rad2bam); stats_reset(&st_deg2bam);
+    stats_reset(&st_sincos_deg_s32); stats_reset(&st_tan_deg_s32);
 
-    /* --- sin / cos --- */
+    /* --- sin / cos (BAM native: 65536-pt) --- */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            u16 bam = (u16)i;
+            double rad = bam * 2.0 * M_PI / 65536.0;
+            stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+            stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        acc_row("sin/cos (BAM)", &st, "very fast binary angle trig");
+    }
+
+    /* --- sin / cos (degree wrappers: 65536-pt at s15.16) --- */
     {
         stats_t &st = st_sincos;
-        const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */
-        /* 65536-point sweep: all s16 values at radix 7 cover > full circle */
-        for (int i = -32768; i <= 32767; i++) {
-            double deg = (double)i / (1 << radix);
-            double rad = deg * M_PI / 180.0;
-            stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), sin(rad));
-            stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), cos(rad));
+        const u16 radix = 16;
+        for (int i = 0; i < 65536; i++) {
+            double deg = -360.0 + (720.0 * i / 65536.0);
+            s32 deg_fp = tofix(deg, radix);
+            double actual_deg = frd(deg_fp, radix);
+            double rad = actual_deg * M_PI / 180.0;
+            stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, radix), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+            stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, radix), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
         }
-        /* Special cases: exact integer degrees including negative */
         s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360,
                           -30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360};
         for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
             s16 d = specials[si];
             double rad = d * M_PI / 180.0;
-            stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), sin(rad));
-            stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), cos(rad));
+            stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+            stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        acc_row("sin/cos (deg)", &st, "degree input trig fns");
+    }
+
+    /* --- sin / cos (radian wrappers: 65536-pt) --- */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+            s32 rad_fp = tofix(angle, 16);
+            double actual_angle = frd(rad_fp, 16);
+            stats_add(&st, actual_angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0);
+            stats_add(&st, actual_angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0);
+        }
+        acc_row("sin/cos (rad)", &st, "radian (traditional) trig");
+    }
+
+    /* --- tan (BAM native: 65536-pt, full sweep) --- */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            u16 bam = (u16)i;
+            double ref;
+            if (bam == 16384)       ref =  TAN_CLAMP;  /* 90°: +maxint */
+            else if (bam == 49152)  ref = -TAN_CLAMP;  /* 270°: -maxint */
+            else                    ref = tan_ref(bam * 2.0 * M_PI / 65536.0);
+            stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref), TAN_CLAMP);
         }
-        acc_row("sin / cos", &st, "65536-pt sweep + specials");
+        acc_row("tan (BAM)", &st, "binary angle tangent; ±maxint at poles");
     }
 
-    /* --- tan --- */
+    /* --- tan (degree wrappers: 65536-pt at s15.16, full sweep) --- */
     {
         stats_t &st = st_tan;
-        const u16 radix = 7;
-        for (int i = -32768; i <= 32767; i++) {
-            double deg = (double)i / (1 << radix);
-            double rad = deg * M_PI / 180.0;
-            /* Skip near poles: |cos| < 0.01 → tan > 100 */
-            if (fabs(cos(rad)) < 0.01) continue;
-            stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), tan(rad));
+        const u16 radix = 16;
+        for (int i = 0; i < 65536; i++) {
+            double deg = -360.0 + (720.0 * i / 65536.0);
+            s32 deg_fp = tofix(deg, radix);
+            double actual_deg = frd(deg_fp, radix);
+            double rad = actual_deg * M_PI / 180.0;
+            stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
         }
-        /* Special cases: integer degrees (avoiding poles) */
         s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150};
         for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
             s16 d = specials[si];
             double rad = d * M_PI / 180.0;
-            stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), tan(rad));
+            stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
         }
-        acc_row("tan", &st, "65536-pt sweep (skip poles)");
+        acc_row("tan (deg)", &st, "degree input tangent; saturated at poles");
+    }
+
+    /* --- tan (radian wrappers: 65536-pt, full sweep) --- */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+            s32 rad_fp = tofix(angle, 16);
+            double actual_angle = frd(rad_fp, 16);
+            stats_add(&st, actual_angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP);
+        }
+        acc_row("tan (rad)", &st, "radian (traditional) tangent");
     }
 
     /* --- asin / acos --- */
@@ -1848,14 +2021,14 @@ static void section_accuracy_table(void) {
         stats_t &st = st_asincos;
         /* 65536-point sweep: all representable values at radix 15 over [-1, +1) */
         for (int i = -32768; i <= 32767; i++) {
-            double xd = (double)i / (1 << 15);
+            double xd = (double)i / 32768.0;
             if (xd < -1.0 || xd > 1.0) continue;
             s32 rad = FR_asin((s32)i, 15, R);
-            stats_add(&st, xd, frd(rad, R), asin(xd));
+            stats_add(&st, xd, frd(rad, R), q16(asin(xd)), M_PI);
             rad = FR_acos((s32)i, 15, R);
-            stats_add(&st, xd, frd(rad, R), acos(xd));
+            stats_add(&st, xd, frd(rad, R), q16(acos(xd)), M_PI);
         }
-        acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary");
+        acc_row("asin / acos", &st, "reverse trig, radian output");
     }
 
     /* --- atan2 --- */
@@ -1874,19 +2047,19 @@ static void section_accuracy_table(void) {
             for (int i = -32767; i <= 32768; i++) {
                 double angle = i * M_PI / 32768.0;
                 double x = rad * cos(angle), y = rad * sin(angle);
-                s32 fx = (s32)(x * scale);
-                s32 fy = (s32)(y * scale);
+                s32 fx = tofix(x, R);
+                s32 fy = tofix(y, R);
                 if (fx == 0 && fy == 0) continue;
                 s32 afx = (fx < 0) ? -fx : fx;
                 s32 afy = (fy < 0) ? -fy : fy;
                 s32 minor = (afx < afy) ? afx : afy;
                 if (minor < 256) continue; /* input quantization, not algo */
                 s32 r = FR_atan2(fy, fx, R);
-                double ref = atan2(y, x);
+                double ref = atan2((double)fy, (double)fx);
                 /* Skip near ±pi branch cut: sign depends on sub-LSB
                  * input quantization, not algorithm accuracy. */
                 if (fabs(fabs(ref) - M_PI) < 0.01) continue;
-                stats_add(&st, angle * 180.0 / M_PI, frd(r, R), ref);
+                stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref), M_PI);
             }
         }
         /* Special cases: exact quadrant/octant/30-degree angles */
@@ -1895,29 +2068,26 @@ static void section_accuracy_table(void) {
         for (int si = 0; si < (int)(sizeof(specials_deg)/sizeof(specials_deg[0])); si++) {
             double angle = specials_deg[si] * M_PI / 180.0;
             double x = 100.0 * cos(angle), y = 100.0 * sin(angle);
-            s32 fx = (s32)(x * scale), fy = (s32)(y * scale);
+            s32 fx = tofix(x, R), fy = tofix(y, R);
             if (fx == 0 && fy == 0) continue;
             s32 r = FR_atan2(fy, fx, R);
-            stats_add(&st, specials_deg[si], frd(r, R), atan2(y, x));
+            stats_add(&st, specials_deg[si], frd(r, R), q16(atan2((double)fy, (double)fx)), M_PI);
         }
-        acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8");
+        acc_row("atan2", &st, "reverse tangent, always safe");
     }
 
     /* --- atan --- */
     {
         stats_t st; stats_reset(&st);
-        /* Sweep atan(x) for x in [-10, 10] with fine steps near zero.
-         * FR_atan(input, radix, out_radix) calls FR_atan2(input, 1<<radix, out_radix).
-         * Skip |expected| < 0.01 to match the percent-error convention. */
         for (int i = -10000; i <= 10000; i++) {
             double x = i / 1000.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_atan(fr, (u16)R, (u16)R);
-            double ref = atan(x);
-            if (fabs(ref) < 0.01) continue;
-            stats_add(&st, x, frd(r, R), ref);
+            double ref = atan(actual_x);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), M_PI / 2.0);
         }
-        acc_row("atan", &st, "20001-pt sweep [-10,10]; via FR_atan2");
+        acc_row("atan", &st, "reverse tangent, accepts up to maxint");
     }
 
     /* --- sqrt --- */
@@ -1925,16 +2095,18 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         double inputs[] = {0.0001, 0.25, 0.5, 1, 2, 3, 4, 7, 9, 16, 25, 100, 1024, 10000, 32000};
         for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) {
-            s32 fr = (s32)(inputs[i] * scale);
+            s32 fr = tofix(inputs[i], R);
+            double actual_x = frd(fr, R);
             s32 r = FR_sqrt(fr, R);
-            stats_add(&st, inputs[i], frd(r, R), sqrt(inputs[i]));
+            stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0));
         }
         /* Fine sweep */
         for (int i = 1; i <= 1000; i++) {
             double x = i * 10.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_sqrt(fr, R);
-            stats_add(&st, x, frd(r, R), sqrt(x));
+            stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0));
         }
         acc_row("sqrt", &st, "Round-to-nearest");
     }
@@ -1944,20 +2116,22 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         /* Integer inputs — stay within s32 range at radix 16 (max ~32767) */
         for (int v = 1; v <= 32000; v += (v < 100 ? 1 : v / 10)) {
-            s32 fr = (s32)((double)v * scale);
+            s32 fr = tofix((double)v, R);
             if (fr <= 0) continue;
+            double actual_v = frd(fr, R);
             s32 r = FR_log2(fr, (u16)R, (u16)R);
-            stats_add(&st, (double)v, frd(r, R), log2((double)v));
+            stats_add(&st, actual_v, frd(r, R), q16(log2(actual_v)), log2(32000.0));
         }
         /* Fractional sweep 0.125 .. 1.0 */
         for (int i = 1; i <= 100; i++) {
             double x = 0.125 + (0.875 * i / 100.0);
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_log2(fr, (u16)R, (u16)R);
-            stats_add(&st, x, frd(r, R), log2(x));
+            stats_add(&st, actual_x, frd(r, R), q16(log2(actual_x)), log2(32000.0));
         }
-        acc_row("log2", &st, "65-entry mantissa table");
+        acc_row("log2", &st, "shift/add only for speed");
     }
 
     /* --- pow2 --- */
@@ -1965,12 +2139,13 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         for (int i = -800; i <= 800; i++) {
             double x = i / 100.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_pow2(fr, R);
-            double ref = pow(2.0, x);
-            stats_add(&st, x, frd(r, R), ref);
+            double ref = pow(2.0, actual_x);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), pow(2.0, 8.0));
         }
-        acc_row("pow2", &st, "65-entry fraction table");
+        acc_row("pow2", &st, "shift/add only for speed");
     }
 
     /* --- ln, log10 --- */
@@ -1978,16 +2153,17 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         double inputs[] = {0.125, 0.25, 0.5, 1, 2, M_E, 3, 4, 5, 7, 8, 10, 20, 50, 100, 1000};
         for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) {
-            s32 fr = (s32)(inputs[i] * scale);
+            s32 fr = tofix(inputs[i], R);
             if (fr <= 0) continue;
+            double actual_x = frd(fr, R);
             s32 r = FR_ln(fr, R, R);
-            double ref = log(inputs[i]);
-            stats_add(&st, inputs[i], frd(r, R), ref);
+            double ref = log(actual_x);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), log(32000.0));
             r = FR_log10(fr, R, R);
-            ref = log10(inputs[i]);
-            stats_add(&st, inputs[i], frd(r, R), ref);
+            ref = log10(actual_x);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), log10(32000.0));
         }
-        acc_row("ln, log10", &st, "Via FR_MULK28 from log2");
+        acc_row("ln, log10", &st, "shift/add only for speed");
     }
 
     /* --- exp (FR_EXP) --- */
@@ -1995,13 +2171,14 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         for (int i = -400; i <= 400; i++) {
             double x = i / 100.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_EXP(fr, R);
-            double ref = exp(x);
+            double ref = exp(actual_x);
             if (ref > 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */
-            stats_add(&st, x, frd(r, R), ref);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
         }
-        acc_row("exp", &st, "FR_MULK28 + FR_pow2");
+        acc_row("exp", &st, "shift/add only for speed");
     }
 
     /* --- exp_fast (FR_EXP_FAST) --- */
@@ -2009,11 +2186,12 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         for (int i = -400; i <= 400; i++) {
             double x = i / 100.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_EXP_FAST(fr, R);
-            double ref = exp(x);
+            double ref = exp(actual_x);
             if (ref > 32000.0 || ref < 1e-6) continue;
-            stats_add(&st, x, frd(r, R), ref);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
         }
         acc_row("exp_fast", &st, "Shift-only scaling");
     }
@@ -2023,13 +2201,14 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         for (int i = -200; i <= 200; i++) {
             double x = i / 100.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_POW10(fr, R);
-            double ref = pow(10.0, x);
+            double ref = pow(10.0, actual_x);
             if (ref > 32000.0 || ref < 1e-6) continue;
-            stats_add(&st, x, frd(r, R), ref);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
         }
-        acc_row("pow10", &st, "FR_MULK28 + FR_pow2");
+        acc_row("pow10", &st, "shift/add only for speed");
     }
 
     /* --- pow10_fast (FR_POW10_FAST) --- */
@@ -2037,11 +2216,12 @@ static void section_accuracy_table(void) {
         stats_t st; stats_reset(&st);
         for (int i = -200; i <= 200; i++) {
             double x = i / 100.0;
-            s32 fr = (s32)(x * scale);
+            s32 fr = tofix(x, R);
+            double actual_x = frd(fr, R);
             s32 r = FR_POW10_FAST(fr, R);
-            double ref = pow(10.0, x);
+            double ref = pow(10.0, actual_x);
             if (ref > 32000.0 || ref < 1e-6) continue;
-            stats_add(&st, x, frd(r, R), ref);
+            stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
         }
         acc_row("pow10_fast", &st, "Shift-only scaling");
     }
@@ -2054,13 +2234,14 @@ static void section_accuracy_table(void) {
             {1,1},{0.5,0.5},{100,100},{1000,1},{1,1000}
         };
         for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) {
-            s32 fx = (s32)(cases[i].x * scale);
-            s32 fy = (s32)(cases[i].y * scale);
+            s32 fx = tofix(cases[i].x, R);
+            s32 fy = tofix(cases[i].y, R);
+            double actual_x = frd(fx, R), actual_y = frd(fy, R);
             s32 r = FR_hypot(fx, fy, R);
-            double ref = hypot(cases[i].x, cases[i].y);
-            stats_add(&st, ref, frd(r, R), ref);
+            double ref = hypot(actual_x, actual_y);
+            stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0));
         }
-        acc_row("hypot (exact)", &st, "64-bit intermediate");
+        acc_row("hypot (exact)", &st, "Uses 64-bit intermediate");
     }
 
     /* --- hypot_fast8 (8-seg) --- */
@@ -2071,16 +2252,186 @@ static void section_accuracy_table(void) {
             {100,100},{1000,1},{1,1000},{7,24},{20,21}
         };
         for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) {
-            s32 fx = (s32)(cases[i].x * scale);
-            s32 fy = (s32)(cases[i].y * scale);
+            s32 fx = tofix(cases[i].x, R);
+            s32 fy = tofix(cases[i].y, R);
+            double actual_x = frd(fx, R), actual_y = frd(fy, R);
             s32 r = FR_hypot_fast8(fx, fy);
-            double ref = hypot(cases[i].x, cases[i].y);
-            if (ref > 0) stats_add(&st, ref, frd(r, R), ref);
+            double ref = hypot(actual_x, actual_y);
+            if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0));
         }
         acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply");
     }
 
     printf("<!-- ACCURACY_TABLE_END -->\n");
+    printf("\n*Relative error; reference clamped to 1%% of full-scale output.\n\n");
+
+    /* ── Test-only rows (not library functions — conversion & pipeline checks) ── */
+    md_h3("14.0.1 Conversion & pipeline accuracy (test-only)");
+    printf("| Function | Max err (%%) | Avg err (%%) | Note |\n");
+    printf("|---|---:|---:|---|\n");
+
+    /* --- rad→BAM conversion (standalone: 65536-pt) --- */
+    {
+        stats_t &st = st_rad2bam;
+        for (int i = 0; i < 65536; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+            s32 rad_fp = tofix(angle, R);
+            u16 got = fr_rad_to_bam(rad_fp, 16);
+            /* Exact BAM: wrap to u16 */
+            double exact_bam_d = angle * 65536.0 / (2.0 * M_PI);
+            s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5);
+            u16 expected = (u16)(exact_bam_s & 0xFFFF);
+            /* Feed stats as degrees so the error is interpretable */
+            double got_deg = got * (360.0 / 65536.0);
+            double exp_deg = expected * (360.0 / 65536.0);
+            stats_add(&st, angle, got_deg, exp_deg, 360.0);
+        }
+        {
+            char note[128];
+            snprintf(note, sizeof(note),
+                     "fr_rad_to_bam() ±2π at r16; max %d BAM LSB",
+                     (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5));
+            acc_row("rad→BAM conv", &st, note);
+        }
+    }
+
+    /* --- deg→BAM conversion (standalone: 65536-pt) --- */
+    {
+        stats_t &st = st_deg2bam;
+        for (int i = 0; i < 65536; i++) {
+            double deg = -360.0 + (720.0 * i / 65536.0);
+            s32 deg_fp = tofix(deg, R);
+            u16 got = fr_deg_to_bam(deg_fp, 16);
+            /* Exact BAM: wrap to u16 */
+            double exact_bam_d = deg * 65536.0 / 360.0;
+            s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5);
+            u16 expected = (u16)(exact_bam_s & 0xFFFF);
+            double got_deg = got * (360.0 / 65536.0);
+            double exp_deg = expected * (360.0 / 65536.0);
+            stats_add(&st, deg, got_deg, exp_deg, 360.0);
+        }
+        {
+            char note[128];
+            snprintf(note, sizeof(note),
+                     "fr_deg_to_bam() ±360° at r16; max %d BAM LSB",
+                     (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5));
+            acc_row("deg→BAM conv", &st, note);
+        }
+    }
+
+    /* --- sin / cos via integer degrees ±360° --- */
+    {
+        stats_t &st = st_sincos_deg_s32;
+        for (int deg = -360; deg <= 360; deg++) {
+            double rad = deg * M_PI / 180.0;
+            stats_add(&st, (double)deg, frd(FR_SinI(deg), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+            stats_add(&st, (double)deg, frd(FR_CosI(deg), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        acc_row("sin/cos (int deg)", &st, "FR_SinI/FR_CosI ±360° integer degrees");
+    }
+
+    /* --- tan via integer degrees ±360° --- */
+    {
+        stats_t &st = st_tan_deg_s32;
+        for (int deg = -360; deg <= 360; deg++) {
+            double rad = deg * M_PI / 180.0;
+            stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
+        }
+        acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles");
+    }
+
+    /* --- Conversion macro accuracy (all 6 direction macros) --- */
+
+    /* FR_RAD2BAM macro: test within safe range (±pi at r16) */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            double angle = -M_PI + (2.0 * M_PI * i / 65536.0);
+            s32 rad_fp = tofix(angle, R);
+            s32 raw = FR_RAD2BAM(rad_fp);
+            u16 got = (u16)((raw + (1 << 15)) >> 16);
+            double exact_d = angle * 65536.0 / (2.0 * M_PI);
+            u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
+            double got_deg = got * (360.0 / 65536.0);
+            double exp_deg = expected * (360.0 / 65536.0);
+            stats_add(&st, angle, got_deg, exp_deg, 360.0);
+        }
+        acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad");
+    }
+
+    /* FR_DEG2BAM macro: test within safe range (±180° at r7) */
+    {
+        stats_t st; stats_reset(&st);
+        const u16 radix = 7;
+        for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */
+            double deg = (double)i / 128.0;
+            s32 raw = FR_DEG2BAM((s32)i);
+            u16 got = (u16)((raw + (1 << (radix - 1))) >> radix);
+            double exact_d = deg * 65536.0 / 360.0;
+            u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
+            double got_deg = got * (360.0 / 65536.0);
+            double exp_deg = expected * (360.0 / 65536.0);
+            stats_add(&st, deg, got_deg, exp_deg, 360.0);
+        }
+        acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°");
+    }
+
+    /* FR_BAM2RAD macro: multiplies by 2π/65536 using shifts.
+     * BAM 0..32767 at r16 (upper half overflows s32 when <<16). */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 32768; i++) {
+            s32 bam_r16 = (s32)i << 16;
+            s32 rad_fp = FR_BAM2RAD(bam_r16);
+            double got_rad = frd(rad_fp, 16);
+            double exp_rad = (double)i * 2.0 * M_PI / 65536.0;
+            stats_add(&st, (double)i, got_rad, exp_rad, 2.0 * M_PI);
+        }
+        acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)");
+    }
+
+    /* FR_BAM2DEG macro: multiplies by 360/65536 using shifts.
+     * BAM 0..32767 at r16 (same s32 overflow limit). */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 32768; i++) {
+            s32 bam_r16 = (s32)i << 16;
+            s32 deg_fp = FR_BAM2DEG(bam_r16);
+            double got_deg = frd(deg_fp, 16);
+            double exp_deg = (double)i * 360.0 / 65536.0;
+            stats_add(&st, (double)i, got_deg, exp_deg, 360.0);
+        }
+        acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)");
+    }
+
+    /* FR_DEG2RAD macro: 65536-pt ±360° at r16 full */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            double deg = -360.0 + (720.0 * i / 65536.0);
+            s32 deg_fp = tofix(deg, R);
+            s32 rad_fp = FR_DEG2RAD(deg_fp);
+            double got_rad = frd(rad_fp, 16);
+            double exp_rad = deg * M_PI / 180.0;
+            stats_add(&st, deg, got_rad, exp_rad, 2.0 * M_PI);
+        }
+        acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full");
+    }
+
+    /* FR_RAD2DEG macro: 65536-pt ±2π at r16 full */
+    {
+        stats_t st; stats_reset(&st);
+        for (int i = 0; i < 65536; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+            s32 rad_fp = tofix(angle, R);
+            s32 deg_fp = FR_RAD2DEG(rad_fp);
+            double got_deg = frd(deg_fp, 16);
+            double exp_deg = angle * 180.0 / M_PI;
+            stats_add(&st, angle, got_deg, exp_deg, 360.0);
+        }
+        acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full");
+    }
+
     printf("\n");
 
     /* Diagnostic: show where each trig function's worst % error occurs */
@@ -2092,10 +2443,14 @@ static void section_accuracy_table(void) {
     printf("|---|---|---:|---:|---:|---:|\n");
 
     struct { const char *name; stats_t *s; } diag[] = {
-        {"sin / cos", &st_sincos},
-        {"tan",       &st_tan},
-        {"asin/acos", &st_asincos},
-        {"atan2",     &st_atan2},
+        {"sin / cos",       &st_sincos},
+        {"tan",             &st_tan},
+        {"rad→BAM conv",    &st_rad2bam},
+        {"deg→BAM conv",    &st_deg2bam},
+        {"sin/cos (int deg)",&st_sincos_deg_s32},
+        {"tan (int deg)",   &st_tan_deg_s32},
+        {"asin/acos",       &st_asincos},
+        {"atan2",           &st_atan2},
     };
     for (int d = 0; d < (int)(sizeof(diag)/sizeof(diag[0])); d++) {
         stats_t *s = diag[d].s;
@@ -2106,6 +2461,325 @@ static void section_accuracy_table(void) {
                s->max_pct_err);
     }
     printf("\n");
+
+    /* ── 14.3 Per-function trig sweep table ────────────────────────────
+     * One row per public entry point. Each function is swept
+     * independently over its full domain so that peak abs / pct errors
+     * are attributable to a single function, not a combined aggregate.
+     *
+     * Peak pct err is raw |err|/|expected|*100 — no clamping.  Near
+     * zero crossings (sin≈0, cos≈0, asin(0)≈0) the denominator is
+     * tiny and pct blows up even when abs err is sub-LSB.  The Notes
+     * column flags these rows.  Use Peak abs err and Mean abs err to
+     * judge accuracy at zero crossings; use Peak pct err elsewhere.
+     */
+    md_h3("14.2 Neighborhoods (peak error ±10 samples)");
+
+    /* fr_sin radian at i=0 (-360°) — zero crossing neighborhood */
+    neighborhood("fr_sin radian @ -360 deg (i=0)", 3, 0, 10, 131072,
+                 -2.0 * M_PI, 2.0 * M_PI);
+
+    md_h3("14.3 Per-function trig sweep");
+
+    printf("| Function | Input | Range start | Range end | Points | Increment | "
+           "Peak abs err | @abs_err | Peak pct err | @pct_err | Expected | Got | Mean abs err | Notes |\n");
+    printf("|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|---:|---:|---|\n");
+
+    /* Helper: print one row of the per-function table */
+    #define SWEEP_ROW(name, sig, rlo, rhi, pts, step, st, note) \
+        printf("| %s | %s | %s | %s | %d | %s | %f | %.4f | %.4f%% | %.4f | %f | %f | %f | %s |\n", \
+            name, sig, rlo, rhi, pts, step, \
+            (st).max_abs_err, (st).worst_input, (st).max_pct_err, \
+            (st).worst_pct_input, (st).worst_pct_expected, (st).worst_pct_actual, \
+            stats_mean(&(st)), note)
+
+    /* fr_sin_bam */
+    {
+        stats_t st; stats_reset(&st);
+        for (int b = 0; b < 65536; b++) {
+            u16 bam = (u16)b;
+            double rad = bam * 2.0 * M_PI / 65536.0;
+            double deg = bam * 360.0 / 65536.0;
+            stats_add(&st, deg, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+        }
+        SWEEP_ROW("fr_sin_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "");
+    }
+    /* fr_cos_bam */
+    {
+        stats_t st; stats_reset(&st);
+        for (int b = 0; b < 65536; b++) {
+            u16 bam = (u16)b;
+            double rad = bam * 2.0 * M_PI / 65536.0;
+            double deg = bam * 360.0 / 65536.0;
+            stats_add(&st, deg, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        SWEEP_ROW("fr_cos_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "");
+    }
+    /* fr_tan_bam */
+    {
+        stats_t st; stats_reset(&st);
+        for (int b = 0; b < 65536; b++) {
+            u16 bam = (u16)b;
+            double rad = bam * 2.0 * M_PI / 65536.0;
+            double deg = bam * 360.0 / 65536.0;
+            double ref;
+            if (bam == 16384)       ref = TAN_CLAMP;
+            else if (bam == 49152)  ref = -TAN_CLAMP;
+            else                    ref = q16(tan_ref(rad));
+            stats_add(&st, deg, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), ref, TAN_CLAMP);
+        }
+        SWEEP_ROW("fr_tan_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "pole clamped");
+    }
+    /* fr_sin (radian) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+            s32 rad_fp = tofix(angle, 16);
+            double actual_angle = frd(rad_fp, 16);
+            double deg = actual_angle * 180.0 / M_PI;
+            stats_add(&st, deg, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0);
+        }
+        SWEEP_ROW("fr_sin", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π small-angle bypass");
+    }
+    /* fr_cos (radian) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+            s32 rad_fp = tofix(angle, 16);
+            double actual_angle = frd(rad_fp, 16);
+            double deg = actual_angle * 180.0 / M_PI;
+            stats_add(&st, deg, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0);
+        }
+        SWEEP_ROW("fr_cos", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "");
+    }
+    /* fr_tan (radian) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+            s32 rad_fp = tofix(angle, 16);
+            double actual_angle = frd(rad_fp, 16);
+            double deg = actual_angle * 180.0 / M_PI;
+            stats_add(&st, deg, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP);
+        }
+        SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "sign extract + small-angle bypass at 0/pi/2pi; r24 cot(d)~1/d near poles; BAM table elsewhere");
+    }
+    /* FR_SinI */
+    {
+        stats_t st; stats_reset(&st);
+        for (int d = -360; d <= 360; d++) {
+            double rad = d * M_PI / 180.0;
+            stats_add(&st, (double)d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+        }
+        SWEEP_ROW("FR_SinI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "");
+    }
+    /* FR_CosI */
+    {
+        stats_t st; stats_reset(&st);
+        for (int d = -360; d <= 360; d++) {
+            double rad = d * M_PI / 180.0;
+            stats_add(&st, (double)d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        SWEEP_ROW("FR_CosI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "");
+    }
+    /* FR_TanI */
+    {
+        stats_t st; stats_reset(&st);
+        for (int d = -360; d <= 360; d++) {
+            double rad = d * M_PI / 180.0;
+            double ref;
+            if (d % 180 == 90 || d % 180 == -90)
+                ref = (d > 0) ? TAN_CLAMP : -TAN_CLAMP;
+            else
+                ref = q16(tan_ref(rad));
+            stats_add(&st, (double)d, frd(FR_TanI((s16)d), FR_TRIG_OUT_PREC), ref, TAN_CLAMP);
+        }
+        SWEEP_ROW("FR_TanI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "pole clamped");
+    }
+    /* fr_sin_deg (fixed-radix degrees, radix 16) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double deg = -360.0 + 720.0 * i / (double)N2;
+            s32 deg_fp = tofix(deg, 16);
+            double actual_deg = frd(deg_fp, 16);
+            double rad = actual_deg * M_PI / 180.0;
+            stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, 16), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+        }
+        SWEEP_ROW("fr_sin_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak at sin=0 crossing");
+    }
+    /* fr_cos_deg (fixed-radix degrees, radix 16) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double deg = -360.0 + 720.0 * i / (double)N2;
+            s32 deg_fp = tofix(deg, 16);
+            double actual_deg = frd(deg_fp, 16);
+            double rad = actual_deg * M_PI / 180.0;
+            stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, 16), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+        }
+        SWEEP_ROW("fr_cos_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-90/270 small-angle bypass");
+    }
+    /* fr_tan_deg (fixed-radix degrees, radix 16) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N2 = 131072;
+        for (int i = 0; i < N2; i++) {
+            double deg = -360.0 + 720.0 * i / (double)N2;
+            s32 deg_fp = tofix(deg, 16);
+            double actual_deg = frd(deg_fp, 16);
+            double rad = actual_deg * M_PI / 180.0;
+            stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
+        }
+        SWEEP_ROW("fr_tan_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak near tan pole");
+    }
+
+    /* --- Inverse Trig --- */
+
+    /* FR_acos */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65537;
+        for (int i = 0; i < N; i++) {
+            double xd = -1.0 + 2.0 * i / (double)(N - 1);
+            s32 fr = tofix(xd, 15);
+            double actual_xd = frd(fr, 15);
+            s32 rad = FR_acos(fr, 15, 16);
+            stats_add(&st, actual_xd, frd(rad, 16), q16(acos(actual_xd)), M_PI);
+        }
+        SWEEP_ROW("FR_acos", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out");
+    }
+    /* FR_asin */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65537;
+        for (int i = 0; i < N; i++) {
+            double xd = -1.0 + 2.0 * i / (double)(N - 1);
+            s32 fr = tofix(xd, 15);
+            double actual_xd = frd(fr, 15);
+            s32 rad = FR_asin(fr, 15, 16);
+            stats_add(&st, actual_xd, frd(rad, 16), q16(asin(actual_xd)), M_PI);
+        }
+        SWEEP_ROW("FR_asin", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out; pct peak at asin(0)=0");
+    }
+    /* FR_atan */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 131072;
+        for (int i = 0; i < N; i++) {
+            double xd = -10.0 + 20.0 * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 rad = FR_atan(fr, 16, 16);
+            stats_add(&st, actual_xd, frd(rad, 16), q16(atan(actual_xd)), M_PI / 2.0);
+        }
+        SWEEP_ROW("FR_atan", "(s32,u16 16,u16 16)", "-10.0", "+10.0", N, "1.53e-4", st, "r16 in/out");
+    }
+    /* FR_atan2 — unit circle sweep */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 0; i < N; i++) {
+            double angle = -M_PI + 2.0 * M_PI * i / (double)N;
+            double deg = angle * 180.0 / M_PI;
+            s32 x = tofix(cos(angle), 15);
+            s32 y = tofix(sin(angle), 15);
+            s32 rad = FR_atan2(y, x, 16);
+            stats_add(&st, deg, frd(rad, 16), q16(atan2((double)y, (double)x)), M_PI);
+        }
+        SWEEP_ROW("FR_atan2", "(s32 y,s32 x,u16 16)", "-180", "+180", N, "0.0055 deg", st, "unit circle r15");
+    }
+
+    /* --- Log / Exp --- */
+
+    /* FR_log2 */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 1; i <= N; i++) {
+            double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_log2(fr, 16, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(log2(actual_xd)), log2(32000.0));
+        }
+        SWEEP_ROW("FR_log2", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+    }
+    /* FR_ln */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 1; i <= N; i++) {
+            double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_ln(fr, 16, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(log(actual_xd)), log(32000.0));
+        }
+        SWEEP_ROW("FR_ln", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+    }
+    /* FR_log10 */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 1; i <= N; i++) {
+            double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_log10(fr, 16, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(log10(actual_xd)), log10(32000.0));
+        }
+        SWEEP_ROW("FR_log10", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+    }
+    /* FR_pow2 */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 0; i < N; i++) {
+            double xd = -8.0 + 16.0 * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_pow2(fr, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(pow(2.0, actual_xd)), pow(2.0, 8.0));
+        }
+        SWEEP_ROW("FR_pow2", "(s32,u16 16)", "-8.0", "+8.0", N, "2.44e-4", st, "r16 in/out");
+    }
+    /* FR_EXP (macro wrapping FR_pow2) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 0; i < N; i++) {
+            double xd = -5.0 + 15.0 * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_EXP(fr, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(exp(actual_xd)), 32000.0);
+        }
+        SWEEP_ROW("FR_EXP", "(s32,u16 16)", "-5.0", "+10.0", N, "2.29e-4", st, "macro, wraps FR_pow2");
+    }
+    /* FR_POW10 (macro wrapping FR_pow2) */
+    {
+        stats_t st; stats_reset(&st);
+        const int N = 65536;
+        for (int i = 0; i < N; i++) {
+            double xd = -2.0 + 6.0 * i / (double)N;
+            s32 fr = tofix(xd, 16);
+            double actual_xd = frd(fr, 16);
+            s32 r = FR_POW10(fr, 16);
+            stats_add(&st, actual_xd, frd(r, 16), q16(pow(10.0, actual_xd)), 32000.0);
+        }
+        SWEEP_ROW("FR_POW10", "(s32,u16 16)", "-2.0", "+4.0", N, "9.15e-5", st, "macro, wraps FR_pow2");
+    }
+
+    #undef SWEEP_ROW
+    printf("\n");
 }
 
 int main(void) {
@@ -2131,4 +2805,4 @@ int main(void) {
     section_accuracy_table();
 
     return 0;
-}
+}
\ No newline at end of file
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..29d8ac7
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,131 @@
+# FR_Math Tools
+
+Diagnostic and code-generation utilities for the FR_Math library.
+
+## trig_neighborhood
+
+Sweep any math function over a range and print a neighborhood table showing
+raw output, expected reference, absolute error, and percent error.
+
+**Build:** `make tools`
+
+**Usage:**
+```
+trig_neighborhood <func> <center> <half> [options]
+```
+
+### Supported functions (25)
+
+| Category | Functions |
+|---|---|
+| Trig (degrees) | `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin`, `fr_cos`, `fr_tan`, `FR_SinI`, `FR_CosI`, `FR_TanI`, `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` |
+| Inverse trig | `FR_acos`, `FR_asin`, `FR_atan`, `FR_atan2` |
+| Logarithmic | `FR_log2`, `FR_ln`, `FR_log10` |
+| Exponential | `FR_pow2`, `FR_EXP`, `FR_POW10` |
+| Other | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
+
+### Options
+
+| Option | Description | Default |
+|---|---|---|
+| `--inc <step>` | Increment per sample | function-dependent |
+| `--fmt md\|csv\|ascii` | Output format | `md` |
+| `--radix <r>` | Input radix for fixed-point | 16 |
+| `--out_radix <r>` | Output radix (inv trig, log) | 16 |
+| `--y <val>` | Fixed y for hypot functions | 0.0 |
+
+### Default increments
+
+- Trig + FR_atan2: `360/65536` (~0.0055 degrees)
+- FR_acos, FR_asin: `1/32768` (~3.05e-5)
+- All others: `1/65536` (~1.53e-5)
+
+### Examples
+
+```bash
+# Cosine near -90 degrees
+build/trig_neighborhood fr_cos -90 15
+
+# Sine sweep in CSV format
+build/trig_neighborhood fr_sin -360 10 --fmt csv
+
+# Tangent near pole
+build/trig_neighborhood fr_tan 89.5 20 --inc 0.01
+
+# Arcsine near zero
+build/trig_neighborhood FR_asin 0.0001 15 --inc 3.05e-5 --radix 15
+
+# Log2 near 1.0
+build/trig_neighborhood FR_log2 1.0 15 --inc 0.01
+
+# Atan2 near 90 degrees
+build/trig_neighborhood FR_atan2 90 15
+
+# Hypot with y=50
+build/trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8
+```
+
+---
+
+## coef-gen.py
+
+Python script for generating power-of-two coefficient approximations. Given a
+target floating-point value, searches for combinations of `+/- 2^(-k)` terms
+that best approximate the value using only bit-shifts and adds.
+
+**Usage:** `python3 tools/coef-gen.py`
+
+---
+
+## fr_coef-gen.cpp
+
+C++ coefficient generator for 32-bit host. Similar purpose to `coef-gen.py`
+but runs natively and can be used for brute-force search over larger term
+counts.
+
+**Build:** `g++ -O2 tools/fr_coef-gen.cpp -o build/fr_coef-gen`
+
+---
+
+## gen_pow2_table.py
+
+Generates the `gFR_POW2_FRAC_TAB[65]` lookup table used by `FR_pow2()`.
+Output is a C array suitable for inclusion in FR_math.c.
+
+**Usage:** `python3 tools/gen_pow2_table.py`
+
+---
+
+## gen_radix28_constants.py
+
+Generates radix-28 constants used by FR_EXP, FR_ln, FR_log10 for base
+conversion (e.g., `FR_kLOG2E_28`, `FR_kLOG2_10_28`).
+
+**Usage:** `python3 tools/gen_radix28_constants.py`
+
+---
+
+## check_published_versions.sh
+
+Verifies that published version tags match the version defined in
+`FR_math.h` (`FR_MATH_VERSION_HEX`). Used in CI/release workflows.
+
+**Usage:** `bash tools/check_published_versions.sh`
+
+---
+
+## make_release.sh
+
+Release automation script. Bumps version, tags, and prepares release
+artifacts.
+
+**Usage:** `bash tools/make_release.sh`
+
+---
+
+## interp_analysis.html
+
+Interactive HTML/JS visualization for interpolation analysis. Open in a
+browser to explore interpolation error characteristics.
+
+**Usage:** Open `tools/interp_analysis.html` in a web browser.
diff --git a/tools/make_release.sh b/tools/make_release.sh
index 28f7647..0ef8adb 100755
--- a/tools/make_release.sh
+++ b/tools/make_release.sh
@@ -112,7 +112,10 @@ do_sync_version() {
         echo ""
         echo "  Running sync_version.sh to fix drift..."
         bash "${PROJECT_ROOT}/scripts/sync_version.sh"
-        git add -A
+        # Stage only the files sync_version.sh touches (not the whole tree).
+        git add src/FR_math.h VERSION README.md pages/version.json \
+                src/FR_math_2D.h src/FR_math_2D.cpp \
+                library.properties library.json idf_component.yml llms.txt
         pass "Version synced to $VER_STRING (changes staged)"
     else
         pass "All version strings match $VER_STRING"
@@ -149,7 +152,7 @@ do_validate() {
         grep -E "Failed: [1-9]" "${test_log}"
         fail "Test failures detected"
     fi
-    TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum}')
+    TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum+0}')
     pass "${TOTAL_PASSED} tests passed."
 
     echo ""
@@ -228,7 +231,7 @@ do_cross_compile() {
 
 # Files the pipeline itself may modify (badge update, version sync).
 # Anything outside this list is unexpected and should block the release.
-PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/assets/site.js src/FR_math_2D.h src/FR_math_2D.cpp docs/README.md pages/index.html"
+PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/version.json src/FR_math_2D.h src/FR_math_2D.cpp"
 
 do_commit_pipeline_changes() {
     step_header "Commit pipeline-generated changes"
@@ -634,8 +637,8 @@ do_switch_master() {
 do_verify_master() {
     step_header "Verify build on master"
 
-    run_cmd make clean >/dev/null 2>&1
-    run_cmd make test >/dev/null 2>&1
+    make clean >/dev/null 2>&1
+    make test >/dev/null 2>&1
     pass "All tests pass on master."
 }
 
diff --git a/tools/trig_neighborhood.cpp b/tools/trig_neighborhood.cpp
new file mode 100644
index 0000000..4c275d8
--- /dev/null
+++ b/tools/trig_neighborhood.cpp
@@ -0,0 +1,536 @@
+/*
+ * trig_neighborhood.cpp — sweep any math function over a range, print neighborhood table
+ *
+ * Usage:
+ *   trig_neighborhood <func> <center> <half> [--inc <step>] [--fmt md|csv|ascii]
+ *                     [--radix <r>] [--out_radix <r>] [--y <val>]
+ *
+ * Trig functions:
+ *   fr_sin_bam, fr_cos_bam, fr_tan_bam,
+ *   fr_sin, fr_cos, fr_tan,
+ *   FR_SinI, FR_CosI, FR_TanI,
+ *   fr_sin_deg, fr_cos_deg, fr_tan_deg
+ *
+ * Inverse trig:
+ *   FR_acos, FR_asin, FR_atan, FR_atan2
+ *
+ * Logarithmic:
+ *   FR_log2, FR_ln, FR_log10
+ *
+ * Exponential:
+ *   FR_pow2, FR_EXP, FR_POW10
+ *
+ * Other:
+ *   FR_sqrt, FR_hypot, FR_hypot_fast8
+ *
+ * center:       center value (degrees for trig/atan2, input value for others)
+ * half:         number of samples on each side of center
+ * --inc:        increment (default depends on function type)
+ * --fmt:        output format: md (default), csv, ascii
+ * --radix:      input radix for fixed-point functions (default: 16)
+ * --out_radix:  output radix for inverse trig and log (default: 16)
+ * --y:          fixed y value for FR_hypot / FR_hypot_fast8 (default: 0.0)
+ *
+ * Examples:
+ *   trig_neighborhood fr_cos -90 15
+ *   trig_neighborhood fr_sin -360 10 --fmt csv
+ *   trig_neighborhood fr_tan 89.5 20 --inc 0.01
+ *   trig_neighborhood fr_sin_deg 45 10 --radix 8
+ *   trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16
+ *   trig_neighborhood FR_log2 1.0 15 --inc 0.01
+ *   trig_neighborhood FR_atan2 90 15
+ *   trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8
+ *
+ * Build:
+ *   make tools
+ */
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cmath>
+#include "FR_math.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+static double frd(s32 v, int p) { return (double)v / ldexp(1.0, p); }
+static double qN(double v, int p) { double s = ldexp(1.0, p); return floor(v * s + 0.5) / s; }
+/* Round-to-nearest float→fixed conversion (not truncation) */
+static s32 tofix(double v, int p) { return (s32)floor(ldexp(v, p) + 0.5); }
+static const double TAN_CLAMP = (double)0x7fffffff / 65536.0;
+static double tan_ref(double rad) {
+    double t = tan(rad);
+    if (t >  TAN_CLAMP) return  TAN_CLAMP;
+    if (t < -TAN_CLAMP) return -TAN_CLAMP;
+    return t;
+}
+
+enum Func {
+    F_SIN_BAM, F_COS_BAM, F_TAN_BAM,
+    F_SIN,     F_COS,     F_TAN,
+    F_SINI,    F_COSI,    F_TANI,
+    F_SIN_DEG, F_COS_DEG, F_TAN_DEG,
+    F_ACOS, F_ASIN, F_ATAN, F_ATAN2,
+    F_LOG2, F_LN, F_LOG10,
+    F_POW2, F_EXP, F_POW10,
+    F_SQRT, F_HYPOT, F_HYPOT_FAST8,
+    F_UNKNOWN
+};
+
+enum Fmt { FMT_MD, FMT_CSV, FMT_ASCII };
+
+static Func parse_func(const char *s) {
+    if (!strcmp(s, "fr_sin_bam"))      return F_SIN_BAM;
+    if (!strcmp(s, "fr_cos_bam"))      return F_COS_BAM;
+    if (!strcmp(s, "fr_tan_bam"))      return F_TAN_BAM;
+    if (!strcmp(s, "fr_sin"))          return F_SIN;
+    if (!strcmp(s, "fr_cos"))          return F_COS;
+    if (!strcmp(s, "fr_tan"))          return F_TAN;
+    if (!strcmp(s, "FR_SinI"))         return F_SINI;
+    if (!strcmp(s, "FR_CosI"))         return F_COSI;
+    if (!strcmp(s, "FR_TanI"))         return F_TANI;
+    if (!strcmp(s, "fr_sin_deg"))      return F_SIN_DEG;
+    if (!strcmp(s, "fr_cos_deg"))      return F_COS_DEG;
+    if (!strcmp(s, "fr_tan_deg"))      return F_TAN_DEG;
+    if (!strcmp(s, "FR_acos"))         return F_ACOS;
+    if (!strcmp(s, "FR_asin"))         return F_ASIN;
+    if (!strcmp(s, "FR_atan"))         return F_ATAN;
+    if (!strcmp(s, "FR_atan2"))        return F_ATAN2;
+    if (!strcmp(s, "FR_log2"))         return F_LOG2;
+    if (!strcmp(s, "FR_ln"))           return F_LN;
+    if (!strcmp(s, "FR_log10"))        return F_LOG10;
+    if (!strcmp(s, "FR_pow2"))         return F_POW2;
+    if (!strcmp(s, "FR_EXP"))          return F_EXP;
+    if (!strcmp(s, "FR_POW10"))        return F_POW10;
+    if (!strcmp(s, "FR_sqrt"))         return F_SQRT;
+    if (!strcmp(s, "FR_hypot"))        return F_HYPOT;
+    if (!strcmp(s, "FR_hypot_fast8"))  return F_HYPOT_FAST8;
+    return F_UNKNOWN;
+}
+
+static const char *func_name(Func f) {
+    switch (f) {
+    case F_SIN_BAM:     return "fr_sin_bam";
+    case F_COS_BAM:     return "fr_cos_bam";
+    case F_TAN_BAM:     return "fr_tan_bam";
+    case F_SIN:         return "fr_sin";
+    case F_COS:         return "fr_cos";
+    case F_TAN:         return "fr_tan";
+    case F_SINI:        return "FR_SinI";
+    case F_COSI:        return "FR_CosI";
+    case F_TANI:        return "FR_TanI";
+    case F_SIN_DEG:     return "fr_sin_deg";
+    case F_COS_DEG:     return "fr_cos_deg";
+    case F_TAN_DEG:     return "fr_tan_deg";
+    case F_ACOS:        return "FR_acos";
+    case F_ASIN:        return "FR_asin";
+    case F_ATAN:        return "FR_atan";
+    case F_ATAN2:       return "FR_atan2";
+    case F_LOG2:        return "FR_log2";
+    case F_LN:          return "FR_ln";
+    case F_LOG10:       return "FR_log10";
+    case F_POW2:        return "FR_pow2";
+    case F_EXP:         return "FR_EXP";
+    case F_POW10:       return "FR_POW10";
+    case F_SQRT:        return "FR_sqrt";
+    case F_HYPOT:       return "FR_hypot";
+    case F_HYPOT_FAST8: return "FR_hypot_fast8";
+    default:            return "?";
+    }
+}
+
+static int is_sin(Func f) { return f == F_SIN_BAM || f == F_SIN || f == F_SINI || f == F_SIN_DEG; }
+static int is_cos(Func f) { return f == F_COS_BAM || f == F_COS || f == F_COSI || f == F_COS_DEG; }
+static int is_trig(Func f) { return f <= F_TAN_DEG; }
+
+/* Evaluate function. Returns raw s32 result and sets input_fp, expected, out_prec. */
+static s32 eval(Func f, double val, int radix, int out_radix,
+                double y_val, s32 *input_fp, double *expected, int *out_prec)
+{
+    s32 raw = 0;
+
+    /* --- Trig functions (val = degrees) --- */
+    if (is_trig(f)) {
+        double rad = val * M_PI / 180.0;
+        *out_prec = 16;
+
+        if (is_sin(f)) *expected = qN(sin(rad), 16);
+        else if (is_cos(f)) *expected = qN(cos(rad), 16);
+        else *expected = qN(tan_ref(rad), 16);
+
+        switch (f) {
+        case F_SIN_BAM: {
+            u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+            *input_fp = (s32)bam;
+            raw = fr_sin_bam(bam);
+            break;
+        }
+        case F_COS_BAM: {
+            u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+            *input_fp = (s32)bam;
+            raw = fr_cos_bam(bam);
+            break;
+        }
+        case F_TAN_BAM: {
+            u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+            *input_fp = (s32)bam;
+            raw = fr_tan_bam(bam);
+            break;
+        }
+        case F_SIN: {
+            s32 rad_fp = tofix(rad, radix);
+            *input_fp = rad_fp;
+            double actual_rad = (double)rad_fp / (double)(1 << radix);
+            *expected = qN(sin(actual_rad), 16);
+            raw = fr_sin(rad_fp, (u16)radix);
+            break;
+        }
+        case F_COS: {
+            s32 rad_fp = tofix(rad, radix);
+            *input_fp = rad_fp;
+            double actual_rad = (double)rad_fp / (double)(1 << radix);
+            *expected = qN(cos(actual_rad), 16);
+            raw = fr_cos(rad_fp, (u16)radix);
+            break;
+        }
+        case F_TAN: {
+            s32 rad_fp = tofix(rad, radix);
+            *input_fp = rad_fp;
+            double actual_rad = (double)rad_fp / (double)(1 << radix);
+            *expected = qN(tan_ref(actual_rad), 16);
+            raw = fr_tan(rad_fp, (u16)radix);
+            break;
+        }
+        case F_SINI:
+            *input_fp = (s32)(int)val;
+            raw = FR_SinI((int)val);
+            break;
+        case F_COSI:
+            *input_fp = (s32)(int)val;
+            raw = FR_CosI((int)val);
+            break;
+        case F_TANI:
+            *input_fp = (s32)(int)val;
+            raw = FR_TanI((s16)(int)val);
+            break;
+        case F_SIN_DEG: {
+            s32 deg_fp = tofix(val, radix);
+            *input_fp = deg_fp;
+            double actual_deg = (double)deg_fp / (double)(1 << radix);
+            double actual_rad2 = actual_deg * M_PI / 180.0;
+            if (is_sin(f)) *expected = qN(sin(actual_rad2), 16);
+            else if (is_cos(f)) *expected = qN(cos(actual_rad2), 16);
+            else *expected = qN(tan_ref(actual_rad2), 16);
+            raw = fr_sin_deg(deg_fp, (u16)radix);
+            break;
+        }
+        case F_COS_DEG: {
+            s32 deg_fp = tofix(val, radix);
+            *input_fp = deg_fp;
+            double actual_deg = (double)deg_fp / (double)(1 << radix);
+            double actual_rad2 = actual_deg * M_PI / 180.0;
+            *expected = qN(cos(actual_rad2), 16);
+            raw = fr_cos_deg(deg_fp, (u16)radix);
+            break;
+        }
+        case F_TAN_DEG: {
+            s32 deg_fp = tofix(val, radix);
+            *input_fp = deg_fp;
+            double actual_deg = (double)deg_fp / (double)(1 << radix);
+            double actual_rad2 = actual_deg * M_PI / 180.0;
+            *expected = qN(tan_ref(actual_rad2), 16);
+            raw = fr_tan_deg(deg_fp, (u16)radix);
+            break;
+        }
+        default:
+            break;
+        }
+        return raw;
+    }
+
+    /* --- Inverse trig (val = input value, not degrees) --- */
+    if (f == F_ACOS || f == F_ASIN || f == F_ATAN) {
+        *out_prec = out_radix;
+        s32 inp = tofix(val, radix);
+        *input_fp = inp;
+
+        switch (f) {
+        case F_ACOS:
+            raw = FR_acos(inp, (u16)radix, (u16)out_radix);
+            *expected = qN(acos(val), out_radix);
+            break;
+        case F_ASIN:
+            raw = FR_asin(inp, (u16)radix, (u16)out_radix);
+            *expected = qN(asin(val), out_radix);
+            break;
+        case F_ATAN:
+            raw = FR_atan(inp, (u16)radix, (u16)out_radix);
+            *expected = qN(atan(val), out_radix);
+            break;
+        default:
+            break;
+        }
+        return raw;
+    }
+
+    /* --- FR_atan2 (val = degrees on unit circle) --- */
+    if (f == F_ATAN2) {
+        *out_prec = out_radix;
+        double rad = val * M_PI / 180.0;
+        s32 x = tofix(cos(rad), 15);
+        s32 y = tofix(sin(rad), 15);
+        *input_fp = tofix(val, radix);
+        raw = FR_atan2(y, x, (u16)out_radix);
+        double ref = atan2((double)y, (double)x);
+        *expected = qN(ref, out_radix);
+        return raw;
+    }
+
+    /* --- Log functions (val = input value) --- */
+    if (f == F_LOG2 || f == F_LN || f == F_LOG10) {
+        *out_prec = out_radix;
+        s32 inp = tofix(val, radix);
+        *input_fp = inp;
+
+        switch (f) {
+        case F_LOG2:
+            raw = FR_log2(inp, (u16)radix, (u16)out_radix);
+            *expected = (val > 0.0) ? qN(log2(val), out_radix) : 0.0;
+            break;
+        case F_LN:
+            raw = FR_ln(inp, (u16)radix, (u16)out_radix);
+            *expected = (val > 0.0) ? qN(log(val), out_radix) : 0.0;
+            break;
+        case F_LOG10:
+            raw = FR_log10(inp, (u16)radix, (u16)out_radix);
+            *expected = (val > 0.0) ? qN(log10(val), out_radix) : 0.0;
+            break;
+        default:
+            break;
+        }
+        return raw;
+    }
+
+    /* --- Power/exp functions (val = exponent) --- */
+    if (f == F_POW2 || f == F_EXP || f == F_POW10) {
+        *out_prec = radix;
+        s32 inp = tofix(val, radix);
+        *input_fp = inp;
+
+        switch (f) {
+        case F_POW2:
+            raw = FR_pow2(inp, (u16)radix);
+            *expected = qN(pow(2.0, val), radix);
+            break;
+        case F_EXP:
+            raw = FR_EXP(inp, (u16)radix);
+            *expected = qN(exp(val), radix);
+            break;
+        case F_POW10:
+            raw = FR_POW10(inp, (u16)radix);
+            *expected = qN(pow(10.0, val), radix);
+            break;
+        default:
+            break;
+        }
+        return raw;
+    }
+
+    /* --- FR_sqrt (val = input value) --- */
+    if (f == F_SQRT) {
+        *out_prec = radix;
+        s32 inp = tofix(val, radix);
+        *input_fp = inp;
+        raw = FR_sqrt(inp, (u16)radix);
+        *expected = (val >= 0.0) ? qN(sqrt(val), radix) : 0.0;
+        return raw;
+    }
+
+    /* --- FR_hypot / FR_hypot_fast8 (val = x, y_val = y) --- */
+    if (f == F_HYPOT || f == F_HYPOT_FAST8) {
+        *out_prec = radix;
+        s32 x_fp = tofix(val, radix);
+        s32 y_fp = tofix(y_val, radix);
+        *input_fp = x_fp;
+
+        if (f == F_HYPOT)
+            raw = FR_hypot(x_fp, y_fp, (u16)radix);
+        else
+            raw = FR_hypot_fast8(x_fp, y_fp);
+
+        *expected = qN(hypot(val, y_val), radix);
+        return raw;
+    }
+
+    /* fallback */
+    *input_fp = 0;
+    *expected = 0.0;
+    *out_prec = 16;
+    return 0;
+}
+
+/* Smart default increment based on function type */
+static double default_inc(Func f) {
+    if (is_trig(f) || f == F_ATAN2)
+        return 360.0 / 65536.0;  /* ~0.0055 degrees */
+    if (f == F_ACOS || f == F_ASIN)
+        return 1.0 / 32768.0;    /* ~3.05e-5, matches r15 LSB */
+    return 1.0 / 65536.0;        /* ~1.53e-5, matches r16 LSB */
+}
+
+static void usage(void) {
+    fprintf(stderr,
+        "Usage: trig_neighborhood <func> <center> <half> [options]\n"
+        "\n"
+        "Supported functions:\n"
+        "\n"
+        "  Trig (input: degrees):\n"
+        "    fr_sin_bam, fr_cos_bam, fr_tan_bam\n"
+        "    fr_sin, fr_cos, fr_tan\n"
+        "    FR_SinI, FR_CosI, FR_TanI\n"
+        "    fr_sin_deg, fr_cos_deg, fr_tan_deg\n"
+        "\n"
+        "  Inverse trig (input: value):\n"
+        "    FR_acos, FR_asin, FR_atan\n"
+        "\n"
+        "  Inverse trig (input: degrees on unit circle):\n"
+        "    FR_atan2\n"
+        "\n"
+        "  Logarithmic (input: value):\n"
+        "    FR_log2, FR_ln, FR_log10\n"
+        "\n"
+        "  Exponential (input: exponent):\n"
+        "    FR_pow2, FR_EXP, FR_POW10\n"
+        "\n"
+        "  Other:\n"
+        "    FR_sqrt (input: value)\n"
+        "    FR_hypot, FR_hypot_fast8 (input: x, --y for y)\n"
+        "\n"
+        "  center:  center of sweep (degrees for trig/atan2, value otherwise)\n"
+        "  half:    number of samples each side of center\n"
+        "\n"
+        "Options:\n"
+        "  --inc <step>         increment (default depends on function)\n"
+        "  --fmt md|csv|ascii   output format (default: md)\n"
+        "  --radix <r>          input radix for fixed-point (default: 16)\n"
+        "  --out_radix <r>      output radix for inv trig/log (default: 16)\n"
+        "  --y <val>            fixed y value for hypot functions (default: 0.0)\n"
+        "\n"
+        "Examples:\n"
+        "  trig_neighborhood fr_cos -90 15\n"
+        "  trig_neighborhood fr_sin -360 10 --fmt csv\n"
+        "  trig_neighborhood fr_tan 89.5 20 --inc 0.01\n"
+        "  trig_neighborhood fr_sin_deg 45 10 --radix 8\n"
+        "  trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16\n"
+        "  trig_neighborhood FR_log2 1.0 15 --inc 0.01\n"
+        "  trig_neighborhood FR_atan2 90 15\n"
+        "  trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8\n"
+    );
+}
+
+int main(int argc, char **argv) {
+    if (argc < 4) { usage(); return 1; }
+
+    Func func = parse_func(argv[1]);
+    if (func == F_UNKNOWN) {
+        fprintf(stderr, "Unknown function: %s\n", argv[1]);
+        usage();
+        return 1;
+    }
+
+    double center = atof(argv[2]);
+    int half = atoi(argv[3]);
+    double inc = -1.0;  /* sentinel: use default */
+    Fmt fmt = FMT_MD;
+    int radix = 16;
+    int out_radix = 16;
+    double y_val = 0.0;
+
+    for (int i = 4; i < argc; i++) {
+        if (!strcmp(argv[i], "--inc") && i + 1 < argc)
+            inc = atof(argv[++i]);
+        else if (!strcmp(argv[i], "--fmt") && i + 1 < argc) {
+            i++;
+            if (!strcmp(argv[i], "csv"))   fmt = FMT_CSV;
+            else if (!strcmp(argv[i], "ascii")) fmt = FMT_ASCII;
+            else fmt = FMT_MD;
+        }
+        else if (!strcmp(argv[i], "--radix") && i + 1 < argc)
+            radix = atoi(argv[++i]);
+        else if (!strcmp(argv[i], "--out_radix") && i + 1 < argc)
+            out_radix = atoi(argv[++i]);
+        else if (!strcmp(argv[i], "--y") && i + 1 < argc)
+            y_val = atof(argv[++i]);
+    }
+
+    if (inc < 0.0) inc = default_inc(func);
+
+    const char *cols[] = {"sample", "val", "input_fp", "radix", "raw_got", "raw_exp", "expected", "got", "abs_err", "pct_err"};
+    int ncols = 10;
+
+    switch (fmt) {
+    case FMT_CSV:
+        for (int c = 0; c < ncols; c++)
+            printf("%s%s", cols[c], c < ncols - 1 ? "," : "\n");
+        break;
+    case FMT_MD:
+        printf("**%s** center=%.6f, +/-%d samples, inc=%.6g, radix=%d",
+               func_name(func), center, half, inc, radix);
+        if (out_radix != radix)
+            printf(", out_radix=%d", out_radix);
+        if (func == F_HYPOT || func == F_HYPOT_FAST8)
+            printf(", y=%.6f", y_val);
+        printf("\n\n");
+        printf("|");
+        for (int c = 0; c < ncols; c++) printf(" %s |", cols[c]);
+        printf("\n|");
+        for (int c = 0; c < ncols; c++) printf("---|");
+        printf("\n");
+        break;
+    case FMT_ASCII:
+        printf("# %s  center=%.6f  +/-%d  inc=%.6g  radix=%d",
+               func_name(func), center, half, inc, radix);
+        if (out_radix != radix)
+            printf("  out_radix=%d", out_radix);
+        if (func == F_HYPOT || func == F_HYPOT_FAST8)
+            printf("  y=%.6f", y_val);
+        printf("\n");
+        printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n",
+               cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], cols[6], cols[7], cols[8], cols[9]);
+        printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n",
+               "--------", "------------", "------------", "------",
+               "----------", "----------",
+               "------------", "------------", "------------", "------------");
+        break;
+    }
+
+    for (int k = -half; k <= half; k++) {
+        double val = center + k * inc;
+        s32 input_fp;
+        double expected;
+        int out_prec;
+        s32 raw = eval(func, val, radix, out_radix, y_val, &input_fp, &expected, &out_prec);
+        s32 raw_exp = (s32)floor(ldexp(expected, out_prec) + 0.5);
+        double got = frd(raw, out_prec);
+        double ae = fabs(got - expected);
+        double pe = (expected != 0.0) ? ae / fabs(expected) * 100.0 : (ae != 0.0 ? 100.0 : 0.0);
+
+        switch (fmt) {
+        case FMT_CSV:
+            printf("%d,%.6g,%d,%d,%d,%d,%.6f,%.6f,%.6f,%.4f%%\n",
+                   k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+            break;
+        case FMT_MD:
+            printf("| %d | %.6g | %d | %d | %d | %d | %.6f | %.6f | %.6f | %.4f%% |\n",
+                   k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+            break;
+        case FMT_ASCII:
+            printf("%8d %12.6g %12d %6d %10d %10d %12.6f %12.6f %12.6f %11.4f%%\n",
+                   k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+            break;
+        }
+    }
+
+    return 0;
+}