From 518093cc57c9bd0771bd07e673d5940e78cec8b3 Mon Sep 17 00:00:00 2001
From: deftio
Date: Wed, 29 Apr 2026 19:33:28 -0700
Subject: [PATCH 1/7] updated bam fixes for sin/cos
---
README.md | 16 +-
compare_lfm/comparison_results.json | 479 ----------------------------
docker/build_sizes_compare.sh | 174 ++++++++++
docker/size_detail.sh | 102 ++++++
docs/README.md | 48 +--
docs/api-reference.md | 12 +-
docs/building.md | 8 +-
docs/examples.md | 6 +-
docs/fixed-point-primer.md | 30 +-
docs/getting-started.md | 2 +-
docs/releases.md | 23 +-
llms.txt | 3 +-
pages/guide/api-reference.html | 14 +-
pages/guide/building.html | 8 +-
pages/guide/examples.html | 6 +-
pages/guide/fixed-point-primer.html | 32 +-
pages/guide/getting-started.html | 2 +-
pages/index.html | 52 +--
pages/releases.html | 12 +-
release_notes.md | 46 +++
src/FR_math.c | 198 +++++++-----
src/FR_math.h | 172 ++++++----
src/FR_tan32.c | 282 ++++++++++++++++
src/FR_tan_table.h | 115 +++++++
src/FR_trig_table.h | 37 ++-
tests/test_full_coverage.c | 93 +++++-
tests/test_full_sweep.c | 346 ++++++++++++++++++++
tests/test_pole_table.c | 92 ++++++
tests/test_sweep_csv.c | 149 +++++++++
tests/test_tan32.c | 424 ++++++++++++++++++++++++
tests/test_tan32_peaks.c | 198 ++++++++++++
tests/test_tan32_sweep.c | 318 ++++++++++++++++++
tests/test_tdd.cpp | 323 ++++++++++++++++---
33 files changed, 3023 insertions(+), 799 deletions(-)
delete mode 100644 compare_lfm/comparison_results.json
create mode 100755 docker/build_sizes_compare.sh
create mode 100755 docker/size_detail.sh
create mode 100644 src/FR_tan32.c
create mode 100644 src/FR_tan_table.h
create mode 100644 tests/test_full_sweep.c
create mode 100644 tests/test_pole_table.c
create mode 100644 tests/test_sweep_csv.c
create mode 100644 tests/test_tan32.c
create mode 100644 tests/test_tan32_peaks.c
create mode 100644 tests/test_tan32_sweep.c
diff --git a/README.md b/README.md
index 42982fc..357bd5b 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,15 @@ number of fractional bits available. All functions support radix 0 to 30.
| Function | Max err (%) | Avg err (%) | Note |
|---|---:|---:|---|
-| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials |
-| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) |
-| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 |
+| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table |
+| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials |
+| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 |
+| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table |
+| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials |
+| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 |
+| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 |
| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
@@ -59,7 +63,7 @@ number of fractional bits available. All functions support radix 0 to 30.
| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` |
+| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` |
| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json
deleted file mode 100644
index adf0019..0000000
--- a/compare_lfm/comparison_results.json
+++ /dev/null
@@ -1,479 +0,0 @@
-{
- "description": "FR_math vs libfixmath benchmark — both measured against math.h double precision (IEEE 754)",
- "gold_standard": " IEEE 754 double precision (~15 significant digits)",
- "fixed_point_format": "Q16.16 (s15.16), 1 LSB = 1.52587890625000e-05",
- "accuracy_points": 65536,
- "timing_iterations": 100000,
- "rel_error_threshold": 0.01,
- "platform": "macOS ARM (Apple Silicon)",
- "optimization": "-O2",
- "results": [
- {
- "function": "sin",
- "double_reference": "std::sin",
- "sweep": "65536-pt, [-pi, +pi]",
- "speed": {
- "fr_math_ns_per_call": 2.6,
- "libfixmath_ns_per_call": 20.7,
- "fr_math_speedup": 7.94,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 1.34165039e-04,
- "mean_abs_error": 4.23947344e-05,
- "max_error_lsb": 8.8,
- "mean_error_lsb": 2.8,
- "max_rel_error_pct": 1.0615,
- "mean_rel_error_pct": 0.0158
- },
- "libfixmath": {
- "max_abs_error": 7.74511497e-03,
- "mean_abs_error": 5.34549003e-04,
- "max_error_lsb": 507.6,
- "mean_error_lsb": 35.0,
- "max_rel_error_pct": 74.5513,
- "mean_rel_error_pct": 0.6105
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "cos",
- "double_reference": "std::cos",
- "sweep": "65536-pt, [-pi, +pi]",
- "speed": {
- "fr_math_ns_per_call": 4.8,
- "libfixmath_ns_per_call": 18.4,
- "fr_math_speedup": 3.86,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 1.25349009e-04,
- "mean_abs_error": 4.65658208e-05,
- "max_error_lsb": 8.2,
- "mean_error_lsb": 3.1,
- "max_rel_error_pct": 0.9018,
- "mean_rel_error_pct": 0.0161
- },
- "libfixmath": {
- "max_abs_error": 7.75591931e-03,
- "mean_abs_error": 5.36939114e-04,
- "max_error_lsb": 508.3,
- "mean_error_lsb": 35.2,
- "max_rel_error_pct": 74.4001,
- "mean_rel_error_pct": 0.6121
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "tan",
- "double_reference": "std::tan",
- "sweep": "65536-pt, [-1.2, 1.2] rad",
- "speed": {
- "fr_math_ns_per_call": 6.0,
- "libfixmath_ns_per_call": 41.4,
- "fr_math_speedup": 6.89,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 8.49384425e-04,
- "mean_abs_error": 1.04510886e-04,
- "max_error_lsb": 55.7,
- "mean_error_lsb": 6.8,
- "max_rel_error_pct": 1.0080,
- "mean_rel_error_pct": 0.0228
- },
- "libfixmath": {
- "max_abs_error": 1.82495961e-02,
- "mean_abs_error": 8.01092905e-04,
- "max_error_lsb": 1196.0,
- "mean_error_lsb": 52.5,
- "max_rel_error_pct": 0.7099,
- "mean_rel_error_pct": 0.0410
- },
- "closer_to_double": "fr_math"
- },
- "note": "Skip near pi/2"
- },
- {
- "function": "asin",
- "double_reference": "std::asin",
- "sweep": "65536-pt, [-0.999, 0.999]",
- "speed": {
- "fr_math_ns_per_call": 11.5,
- "libfixmath_ns_per_call": 53.7,
- "fr_math_speedup": 4.67,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 4.76933520e-04,
- "mean_abs_error": 4.37641042e-05,
- "max_error_lsb": 31.3,
- "mean_error_lsb": 2.9,
- "max_rel_error_pct": 0.5795,
- "mean_rel_error_pct": 0.0134
- },
- "libfixmath": {
- "max_abs_error": 1.01788963e-02,
- "mean_abs_error": 3.64421558e-03,
- "max_error_lsb": 667.1,
- "mean_error_lsb": 238.8,
- "max_rel_error_pct": 20.1233,
- "mean_rel_error_pct": 2.4452
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "acos",
- "double_reference": "std::acos",
- "sweep": "65536-pt, [-0.999, 0.999]",
- "speed": {
- "fr_math_ns_per_call": 8.4,
- "libfixmath_ns_per_call": 50.4,
- "fr_math_speedup": 5.97,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 4.72479065e-04,
- "mean_abs_error": 4.33857475e-05,
- "max_error_lsb": 31.0,
- "mean_error_lsb": 2.8,
- "max_rel_error_pct": 0.5194,
- "mean_rel_error_pct": 0.0056
- },
- "libfixmath": {
- "max_abs_error": 1.01897006e-02,
- "mean_abs_error": 3.64422377e-03,
- "max_error_lsb": 667.8,
- "mean_error_lsb": 238.8,
- "max_rel_error_pct": 15.3142,
- "mean_rel_error_pct": 0.3475
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "atan",
- "double_reference": "std::atan",
- "sweep": "65536-pt, [-50, 50]",
- "speed": {
- "fr_math_ns_per_call": 8.0,
- "libfixmath_ns_per_call": 11.2,
- "fr_math_speedup": 1.41,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 9.57408985e-04,
- "mean_abs_error": 7.37662492e-05,
- "max_error_lsb": 62.7,
- "mean_error_lsb": 4.8,
- "max_rel_error_pct": 0.2149,
- "mean_rel_error_pct": 0.0061
- },
- "libfixmath": {
- "max_abs_error": 1.01676134e-02,
- "mean_abs_error": 6.15802358e-03,
- "max_error_lsb": 666.3,
- "mean_error_lsb": 403.6,
- "max_rel_error_pct": 19.8632,
- "mean_rel_error_pct": 0.4571
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "atan2",
- "double_reference": "std::atan2",
- "sweep": "65536-pt, 5 radii x 360 deg",
- "speed": {
- "fr_math_ns_per_call": 15.9,
- "libfixmath_ns_per_call": 10.5,
- "fr_math_speedup": 0.66,
- "faster": "libfixmath"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 9.70679332e-04,
- "mean_abs_error": 2.15170870e-04,
- "max_error_lsb": 63.6,
- "mean_error_lsb": 14.1,
- "max_rel_error_pct": 0.4122,
- "mean_rel_error_pct": 0.0258
- },
- "libfixmath": {
- "max_abs_error": 1.01728729e-02,
- "mean_abs_error": 3.88005371e-03,
- "max_error_lsb": 666.7,
- "mean_error_lsb": 254.3,
- "max_rel_error_pct": 20.0045,
- "mean_rel_error_pct": 0.9267
- },
- "closer_to_double": "fr_math"
- },
- "note": "All 4 quadrants"
- },
- {
- "function": "sqrt",
- "double_reference": "std::sqrt",
- "sweep": "65536-pt, [0.01, 100]",
- "speed": {
- "fr_math_ns_per_call": 18.6,
- "libfixmath_ns_per_call": 19.8,
- "fr_math_speedup": 1.06,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 7.62924903e-06,
- "mean_abs_error": 3.80582266e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.2,
- "max_rel_error_pct": 0.0062,
- "mean_rel_error_pct": 0.0001
- },
- "libfixmath": {
- "max_abs_error": 7.62924903e-06,
- "mean_abs_error": 3.80582266e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.2,
- "max_rel_error_pct": 0.0062,
- "mean_rel_error_pct": 0.0001
- },
- "closer_to_double": "tie"
- }
- },
- {
- "function": "exp",
- "double_reference": "std::exp",
- "sweep": "65536-pt, [-5, 5]",
- "speed": {
- "fr_math_ns_per_call": 3.1,
- "libfixmath_ns_per_call": 67.6,
- "fr_math_speedup": 22.02,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 3.17909587e-03,
- "mean_abs_error": 1.03218909e-04,
- "max_error_lsb": 208.3,
- "mean_error_lsb": 6.8,
- "max_rel_error_pct": 0.1486,
- "mean_rel_error_pct": 0.0078
- },
- "libfixmath": {
- "max_abs_error": 3.30095957e-03,
- "mean_abs_error": 9.38398029e-05,
- "max_error_lsb": 216.3,
- "mean_error_lsb": 6.1,
- "max_rel_error_pct": 0.0756,
- "mean_rel_error_pct": 0.0042
- },
- "closer_to_double": "fr_math"
- }
- },
- {
- "function": "ln",
- "double_reference": "std::log",
- "sweep": "65536-pt, [0.01, 100]",
- "speed": {
- "fr_math_ns_per_call": 8.8,
- "libfixmath_ns_per_call": 479.3,
- "fr_math_speedup": 54.70,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 4.93278555e-05,
- "mean_abs_error": 1.61117669e-05,
- "max_error_lsb": 3.2,
- "mean_error_lsb": 1.1,
- "max_rel_error_pct": 0.3012,
- "mean_rel_error_pct": 0.0006
- },
- "libfixmath": {
- "max_abs_error": 3.40447818e-05,
- "mean_abs_error": 5.14211182e-06,
- "max_error_lsb": 2.2,
- "mean_error_lsb": 0.3,
- "max_rel_error_pct": 0.0557,
- "mean_rel_error_pct": 0.0002
- },
- "closer_to_double": "libfixmath"
- }
- },
- {
- "function": "log2",
- "double_reference": "std::log2",
- "sweep": "65536-pt, [0.01, 100]",
- "speed": {
- "fr_math_ns_per_call": 8.7,
- "libfixmath_ns_per_call": 39.4,
- "fr_math_speedup": 4.55,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 6.06739329e-05,
- "mean_abs_error": 2.30368713e-05,
- "max_error_lsb": 4.0,
- "mean_error_lsb": 1.5,
- "max_rel_error_pct": 0.4945,
- "mean_rel_error_pct": 0.0006
- },
- "libfixmath": {
- "max_abs_error": 3.56826644e-05,
- "mean_abs_error": 9.96190621e-06,
- "max_error_lsb": 2.3,
- "mean_error_lsb": 0.7,
- "max_rel_error_pct": 0.1758,
- "mean_rel_error_pct": 0.0002
- },
- "closer_to_double": "libfixmath"
- }
- },
- {
- "function": "mul",
- "double_reference": "double a*b",
- "sweep": "65536-pt, a in [-50,50], b in [-2,2]",
- "speed": {
- "fr_math_ns_per_call": 0.9,
- "libfixmath_ns_per_call": 1.2,
- "fr_math_speedup": 1.33,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 7.62939453e-06,
- "mean_abs_error": 3.81535541e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.3,
- "max_rel_error_pct": 0.0692,
- "mean_rel_error_pct": 0.0004
- },
- "libfixmath": {
- "max_abs_error": 7.62939453e-06,
- "mean_abs_error": 3.81535541e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.3,
- "max_rel_error_pct": 0.0692,
- "mean_rel_error_pct": 0.0004
- },
- "closer_to_double": "tie"
- }
- },
- {
- "function": "div",
- "double_reference": "double a/b",
- "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]",
- "speed": {
- "fr_math_ns_per_call": 0.9,
- "libfixmath_ns_per_call": 5.2,
- "fr_math_speedup": 5.98,
- "faster": "fr_math"
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 7.62927377e-06,
- "mean_abs_error": 3.82182808e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.3,
- "max_rel_error_pct": 0.0727,
- "mean_rel_error_pct": 0.0010
- },
- "libfixmath": {
- "max_abs_error": 8.37162948e-06,
- "mean_abs_error": 3.82625614e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.3,
- "max_rel_error_pct": 0.0727,
- "mean_rel_error_pct": 0.0010
- },
- "closer_to_double": "fr_math"
- },
- "note": "Both use 64-bit intermediate"
- },
- {
- "function": "hypot",
- "double_reference": "std::hypot",
- "sweep": "65536-pt, 5 radii x 360 deg",
- "speed": {
- "fr_math_ns_per_call": 20.0
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 7.62930188e-06,
- "mean_abs_error": 3.67171926e-06,
- "max_error_lsb": 0.5,
- "mean_error_lsb": 0.2,
- "max_rel_error_pct": 0.0076,
- "mean_rel_error_pct": 0.0009
- }
- },
- "note": "FR_math only (libfixmath has no hypot)"
- },
- {
- "function": "hypot_fast8",
- "double_reference": "std::hypot",
- "sweep": "65536-pt, 5 radii x 360 deg",
- "speed": {
- "fr_math_ns_per_call": 2.4
- },
- "accuracy_vs_double": {
- "fr_math": {
- "max_abs_error": 1.37244198e+00,
- "mean_abs_error": 1.13634634e-01,
- "max_error_lsb": 89944.4,
- "mean_error_lsb": 7447.2,
- "max_rel_error_pct": 0.1372,
- "mean_rel_error_pct": 0.0516
- }
- },
- "note": "FR_math only; shift-only, no multiply"
- }
- ],
- "summary": {
- "head_to_head_functions": 13,
- "faster_wins": { "fr_math": 12, "libfixmath": 1 },
- "accuracy_wins": { "fr_math": 9, "libfixmath": 2, "tie": 2 },
- "total_functions_tested": 15
- },
- "notes": [
- "All accuracy measured vs IEEE 754 double. Lower = closer to perfect.",
- "LSB = Q16.16 least-significant-bit = 1.53e-5. Best possible = 0.5 LSB.",
- "Percent errors skip |ref| < 0.01 to avoid near-zero division spikes.",
- "Both libraries use Q16.16 (s15.16): 1.0 = 65536.",
- "FR_math trig: BAM + 129-entry LUT + linear interpolation.",
- "libfixmath trig: parabolic approximation + 5th-order correction.",
- "Timing: min of 3 passes x 100000 calls; cache-warm.",
- "Speedup > 1.0 means FR_math is faster by that factor."
- ],
- "compiled_size_note": "Run 'make size' in .compare/ for live numbers. The values below are representative.",
- "compiled_size": {
- "compiler": "clang -O2 (macOS ARM)",
- "fr_math": {
- "files": "FR_math.c (single file)",
- "functions": "trig(6), inv-trig(4), log/ln/log10, exp/pow2/pow10, exp_fast/pow10_fast, sqrt, hypot(2), waves(6), ADSR(4), print(4), format",
- "rom_bytes": 7470,
- "ram_bss_bytes": 0,
- "note": "All tables in const ROM. Zero runtime allocation."
- },
- "libfixmath": {
- "files": "fix16.c, fix16_sqrt.c, fix16_exp.c, fix16_trig.c, fix16_str.c, uint32.c, fract32.c",
- "functions": "trig(6), inv-trig(4), log/log2, exp, sqrt, mul/div, str",
- "rom_bytes": 4912,
- "ram_bss_bytes": 114688,
- "rom_bytes_no_cache": 5476,
- "ram_bss_bytes_no_cache": 0,
- "note": "Default mode caches 112 KB of sin/exp LUTs in BSS. FIXMATH_NO_CACHE eliminates RAM but recomputes per call."
- }
- }
-}
diff --git a/docker/build_sizes_compare.sh b/docker/build_sizes_compare.sh
new file mode 100755
index 0000000..940de5f
--- /dev/null
+++ b/docker/build_sizes_compare.sh
@@ -0,0 +1,174 @@
+#!/usr/bin/env bash
+#
+# build_sizes_compare.sh — cross-compile FR_math.c with and without FR_tan32.c
+# for every supported target, and report the size delta.
+#
+# Run inside the Docker container:
+# docker run --rm -v $(pwd):/src fr-math-sizes bash /src/docker/build_sizes_compare.sh
+
+set -euo pipefail
+
+SRC_OLD="/src/src/FR_math.c"
+SRC_NEW="/src/src/FR_tan32.c"
+INC="-I/src/src"
+OUT="/src/build/size_compare"
+
+mkdir -p "${OUT}"
+
+# ── helpers ────────────────────────────────────────────────────────────
+
+# get_text_size
+# Compiles source(s) to .o files, sums .text sections.
+get_text_size() {
+ local label="$1"; shift
+ local cc="$1"; shift
+ local sz_cmd="$1"; shift
+ local flags="$1"; shift
+ # remaining args are source files
+
+ if ! command -v "${cc}" >/dev/null 2>&1; then
+ echo "n/a"
+ return
+ fi
+
+ local total=0
+ for src in "$@"; do
+ local bname
+ bname=$(basename "${src}" .c)
+ local obj="${OUT}/${label}_${bname}.o"
+ if ! ${cc} ${flags} ${INC} -std=c99 -Wall -Os -ffreestanding \
+ -c "${src}" -o "${obj}" 2>/dev/null; then
+ echo "fail"
+ return
+ fi
+ local text
+ text=$(${sz_cmd} --format=berkeley "${obj}" 2>/dev/null | tail -1 | awk '{print $1}')
+ total=$((total + text))
+ done
+ echo "${total}"
+}
+
+# resolve_size_tool: given a compiler path, find the matching size binary
+resolve_size_tool() {
+ local cc="$1"
+ local prefix="${cc%-gcc*}"
+ prefix="${prefix%-gcc-*}"
+ if [[ "${prefix}" != "${cc}" ]] && command -v "${prefix}-size" >/dev/null 2>&1; then
+ echo "${prefix}-size"
+ else
+ echo "size"
+ fi
+}
+
+# ── target definitions ────────────────────────────────────────────────
+
+declare -a T_NAMES T_CCS T_SZ T_FLAGS
+
+add() {
+ T_NAMES+=("$1")
+ T_CCS+=("$2")
+ T_SZ+=("$(resolve_size_tool "$2")")
+ T_FLAGS+=("$3")
+}
+
+# ARM
+add "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb"
+add "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft"
+add "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb"
+
+# RISC-V
+add "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32"
+
+# Xtensa (ESP32)
+add "ESP32 (Xtensa)" xtensa-esp-elf-gcc ""
+
+# 68k
+add "68k" m68k-linux-gnu-gcc-12 ""
+
+# x86
+add "x86-32" gcc "-m32"
+add "x86-64" gcc "-m64"
+
+# MSP430 (16-bit, no stdint)
+add "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT"
+
+# 68HC11 (8-bit)
+add "68HC11" m68hc11-gcc "-DFR_NO_STDINT"
+
+# ── compile ────────────────────────────────────────────────────────────
+
+echo ""
+echo "FR_Math cross-platform size comparison: OLD vs OLD+NEW tan32"
+echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')"
+echo ""
+
+declare -a R_OLD R_NEW
+
+for i in "${!T_NAMES[@]}"; do
+ label="${T_NAMES[$i]}"
+ cc="${T_CCS[$i]}"
+ sz="${T_SZ[$i]}"
+ flags="${T_FLAGS[$i]}"
+
+ tag=$(echo "${label}" | tr ' ()/' '____')
+
+ old=$(get_text_size "${tag}_old" "${cc}" "${sz}" "${flags}" "${SRC_OLD}")
+ new=$(get_text_size "${tag}_new" "${cc}" "${sz}" "${flags}" "${SRC_OLD}" "${SRC_NEW}")
+
+ R_OLD+=("${old}")
+ R_NEW+=("${new}")
+
+ echo " ${label}: old=${old} old+new=${new}"
+done
+
+# ── output table ───────────────────────────────────────────────────────
+
+echo ""
+echo "## FR_Math size: Old vs Old + 32-bit LUT tan (\`-Os -ffreestanding\`)"
+echo ""
+printf "| %-26s | %10s | %10s | %10s | %6s |\n" "Target" "Old (text)" "w/ tan32" "Delta" "Delta%"
+printf "| %-26s | %10s | %10s | %10s | %6s |\n" "--------------------------" "----------" "----------" "----------" "------"
+
+for i in "${!T_NAMES[@]}"; do
+ old="${R_OLD[$i]}"
+ new="${R_NEW[$i]}"
+
+ if [[ "${old}" =~ ^[0-9]+$ ]] && [[ "${new}" =~ ^[0-9]+$ ]]; then
+ delta=$((new - old))
+ pct=$(awk "BEGIN { printf \"%.1f\", 100.0*${delta}/${old} }")
+ printf "| %-26s | %8s B | %8s B | %+8d B | %5s%% |\n" \
+ "${T_NAMES[$i]}" "${old}" "${new}" "${delta}" "${pct}"
+ else
+ printf "| %-26s | %10s | %10s | %10s | %6s |\n" \
+ "${T_NAMES[$i]}" "${old}" "${new}" "—" "—"
+ fi
+done
+
+echo ""
+echo "Old = FR_math.c only (contains existing tan/atan)."
+echo "w/ tan32 = FR_math.c + FR_tan32.c (adds new 32-bit LUT tan/atan alongside old)."
+echo "Delta = additional bytes from FR_tan32.c (new functions + 129-entry u32 table)."
+echo ""
+
+# ── per-function breakdown (x86-64) ───────────────────────────────────
+
+echo "### Per-function breakdown (x86-64, GCC -Os)"
+echo ""
+
+obj_old="${OUT}/x86_64_old_FR_math.o"
+obj_new="${OUT}/x86_64_new_FR_tan32.o"
+
+if [[ -f "${obj_old}" ]] && [[ -f "${obj_new}" ]]; then
+ echo "**Old tan/atan in FR_math.o:**"
+ echo '```'
+ nm "${obj_old}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -iE "tan|atan" || true
+ echo '```'
+ echo ""
+ echo "**New in FR_tan32.o:**"
+ echo '```'
+ nm "${obj_new}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true
+ echo '```'
+fi
+
+echo ""
+echo "Done."
diff --git a/docker/size_detail.sh b/docker/size_detail.sh
new file mode 100755
index 0000000..19677e5
--- /dev/null
+++ b/docker/size_detail.sh
@@ -0,0 +1,102 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+INC="-I/src/src"
+FLAGS="-std=c99 -Wall -Os -ffreestanding"
+OUT=/tmp/sz
+mkdir -p "${OUT}"
+
+do_platform() {
+ local label="$1"
+ local cc="$2"
+ local flags="$3"
+
+ if ! command -v "${cc}" >/dev/null 2>&1; then
+ return
+ fi
+
+ # Resolve size and nm tools
+ local sz_cmd="size"
+ local nm_cmd="nm"
+ local prefix="${cc%-gcc*}"
+ if [ "${prefix}" != "${cc}" ]; then
+ command -v "${prefix}-size" >/dev/null 2>&1 && sz_cmd="${prefix}-size"
+ command -v "${prefix}-nm" >/dev/null 2>&1 && nm_cmd="${prefix}-nm"
+ fi
+
+ # Compile
+ ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null || return
+ ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null || return
+
+ local old_text new_text
+ old_text=$(${sz_cmd} --format=berkeley "${OUT}/old.o" | tail -1 | awk '{print $1}')
+ new_text=$(${sz_cmd} --format=berkeley "${OUT}/new.o" | tail -1 | awk '{print $1}')
+
+ # Sum old tan/atan function sizes from nm -S
+ local old_tan_total=0
+ while IFS=' ' read -r addr size typ name; do
+ if [ -n "${size}" ]; then
+ dec_size=$((16#${size}))
+ old_tan_total=$((old_tan_total + dec_size))
+ fi
+ done < <(${nm_cmd} -n -S --defined-only "${OUT}/old.o" 2>/dev/null \
+ | grep -E " [tT] " | grep -iE "tan|atan" || true)
+
+ local replace_delta=$((new_text - old_tan_total))
+ local new_total=$((old_text - old_tan_total + new_text))
+
+ printf "| %-26s | %6s | %6s | %6s | %6s | %+6d |\n" \
+ "${label}" "${old_text}" "${old_tan_total}" "${new_text}" "${new_total}" "${replace_delta}"
+
+ rm -f "${OUT}/old.o" "${OUT}/new.o"
+}
+
+echo ""
+echo "## FR_Math: Old vs Replacement size (new tan32 replaces old tan/atan)"
+echo ""
+printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \
+ "Target" "Old" "OldT/A" "New" "Repl" "Delta"
+printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \
+ "--------------------------" "------" "------" "------" "------" "------"
+
+do_platform "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb"
+do_platform "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft"
+do_platform "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb"
+do_platform "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32"
+do_platform "ESP32 (Xtensa)" xtensa-esp-elf-gcc ""
+do_platform "68k" m68k-linux-gnu-gcc-12 ""
+do_platform "x86-32" gcc "-m32"
+do_platform "x86-64" gcc "-m64"
+do_platform "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT"
+
+echo ""
+echo "Old = FR_math.c total .text"
+echo "OldT/A = old tan+atan functions within FR_math.o (would be removed)"
+echo "New = FR_tan32.c total .text (replacement functions + 129-entry u32 table)"
+echo "Repl = library size after replacement (Old - OldT/A + New)"
+echo "Delta = New - OldT/A (net change from replacement)"
+
+# === x86-64 per-function detail ===
+echo ""
+echo "### x86-64 per-function detail"
+echo ""
+
+gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null
+gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null
+
+echo "**Old tan/atan functions in FR_math.o:**"
+echo '```'
+nm -n -S --defined-only "${OUT}/old.o" | grep -E " [tT] " | grep -iE "tan|atan" | \
+while IFS=' ' read -r addr size typ name; do
+ printf " %-30s %d bytes\n" "${name}" "$((16#${size}))"
+done
+echo '```'
+
+echo ""
+echo "**New functions in FR_tan32.o:**"
+echo '```'
+nm -n -S --defined-only "${OUT}/new.o" | grep -E " [tT] " | \
+while IFS=' ' read -r addr size typ name; do
+ printf " %-30s %d bytes\n" "${name}" "$((16#${size}))"
+done
+echo '```'
diff --git a/docs/README.md b/docs/README.md
index 0e1dd1a..c88b451 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -16,7 +16,7 @@ into a single format.
Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc,
RISC-V toolchains, and Arduino.
- Zero dependencies beyond ``.
-- Parameterised radix: every function takes the binary point as an
+- Parameterized radix: every function takes the binary point as an
argument, so you choose how many fractional bits you need per call.
- Deterministic, bounded error — every public symbol has a documented
worst case in the [API reference](api-reference.md).
@@ -33,7 +33,7 @@ or any tooling. If you want the browser version, look in
| --- | --- |
| [getting-started.md](getting-started.md) | Clone, build, run your first FR_Math program. |
| [fixed-point-primer.md](fixed-point-primer.md) | Why fixed-point exists, sM.N notation, operations, how to pick a radix. |
-| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behaviour. |
+| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behavior. |
| [examples.md](examples.md) | Runnable snippets: trig, log, waves, ADSR, 2D transforms. |
| [building.md](building.md) | Makefile, scripts, test suite, coverage, cross-compilation. |
| [releases.md](releases.md) | Release history with per-version highlights and breaking changes. |
@@ -45,24 +45,28 @@ radix — Q16.16 is just the reference point for the table. See the
[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12,
16, and 24. Percent errors skip expected values near zero (|expected| < 0.01).
-
-| Function | Max err (%) | Avg err (%) | Note |
-|---|---:|---:|---|
-| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials |
-| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) |
-| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
-| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
-| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
-| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
-| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
-| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
-| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
+
+| Function | Max err (%) | Avg err (%) | Note |
+|---|---:|---:|---|
+| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table |
+| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials |
+| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 |
+| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table |
+| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials |
+| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 |
+| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 |
+| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
+| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
+| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
+| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
+| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
+| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
+| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
+| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
+| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
## What's in the box
@@ -72,7 +76,7 @@ radix — Q16.16 is just the reference point for the table. See the
| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` |
+| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` |
| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
@@ -118,7 +122,7 @@ pays off:
- **8- and 16-bit MCUs** (AVR, MSP430, 8051, sdcc) where the FPU does
not exist and even software float is too slow or too large.
-- **Hot inner loops on any CPU** where a parameterised-radix integer
+- **Hot inner loops on any CPU** where a parameterized-radix integer
multiply is faster and more deterministic than a `float`. Think DSP
taps, PID loops, coordinate transforms inside a scanline renderer.
- **Bit-exact reproducibility** across compilers, architectures, and
diff --git a/docs/api-reference.md b/docs/api-reference.md
index 3f97f20..1ec3742 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -1,7 +1,7 @@
# API Reference
Every public symbol, grouped by topic. Each entry lists the radix
-convention, the precision, and the error / saturation behaviour. All
+convention, the precision, and the error / saturation behavior. All
types are from `FR_defs.h`: `s8 s16 s32 s64` for
signed and `u8 u16 u32 u64` for unsigned integers (these are
aliases for the `` types).
@@ -12,7 +12,7 @@ Most entries list **inputs**, **output**,
**radix handling** and **precision**
separately, because in a mixed-radix library those four things are
what actually lets you plan an arithmetic pipeline without hidden
-quantisation. If you are new to fixed-point, the
+quantization. If you are new to fixed-point, the
[Fixed-Point Primer](fixed-point-primer.md) explains the
notation first; come back here once you're comfortable reading
`s15.16` and `s0.15`.
@@ -118,7 +118,7 @@ so call sites read as intent:
| --- | --- | --- | --- |
| `I2FR(i, r)` | `i`: integer; `r`: target radix in bits | `s32` at radix `r` | `(i) << (r)`. No bounds check. Use when you know `|i|` fits in `32 − r` signed bits. |
| `FR2I(x, r)` | `x`: fixed-point at radix `r` | integer | `(x) >> (r)`. Truncates toward **−∞** (C's signed shift). `FR2I(-1, 4) == -1`, not 0. |
-| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behaviour. |
+| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behavior. |
| `FR_NUM(i, f, d, r)` | `i`: integer part; `f`: decimal fraction digits; `d`: number of digits in `f`; `r`: target radix | `s32` at radix `r` | Build a fixed-point literal from decimal. `FR_NUM(12, 34, 2, 10)` is 12.34 at s.10. Rounds toward zero; for round-to-nearest, add half an LSB at the call site. |
| `FR_numstr(s, r)` | `s`: null-terminated decimal string (e.g. `"3.14159"`); `r`: target radix | `s32` at radix `r` | Runtime string-to-fixed-point parser (inverse of `FR_printNumF`). Handles signs, leading whitespace, and leading-zero fractions like `"0.05"`. Up to 9 fractional digits. No malloc, no strtod, no libm. Returns 0 for NULL or empty input. |
| `FR2D(x, r)` | `x`: fixed-point at radix `r` | `double` | Debug-only: `x / (double)(1 << r)`. Pulls in `libm` — compile it out of release builds. |
@@ -157,13 +157,13 @@ so call sites read as intent:
| `FR_MAX(a, b)` | Two values of the same type | The larger of the two | Evaluates each argument once. |
| `FR_CLAMP(x, lo, hi)` | `x`: value; `lo`, `hi`: bounds | `x` clamped to `[lo, hi]` | Equivalent to `FR_MIN(FR_MAX(x, lo), hi)`. |
| `FR_DIV(x, xr, y, yr)` | `x`: numerator at radix `xr`; `y`: denominator at radix `yr` | `s32` at radix `xr` | Pre-scales the numerator in a 64-bit intermediate and **rounds to nearest** (adds half the divisor before truncating, with correct sign handling). Worst-case error ≤ 0.5 LSB. Works correctly across the full Q16.16 range. |
-| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behaviour of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. |
+| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behavior of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. |
| `FR_DIV32(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s32)(x) << (yr)) / (s32)(y)`. 32-bit-only truncating path — requires `|x| < 2^(31 − yr)` to avoid overflow in the intermediate shift. Use on tiny targets (PIC, AVR, 8051) where 64-bit ops pull in unwanted compiler runtime code. |
| `FR_MOD(x, y)` | `x`, `y`: same radix | remainder at the same radix | `(x) % (y)`. Standard C remainder semantics. |
## Arithmetic
-FR_Math splits arithmetic into three flavours. The
+FR_Math splits arithmetic into three flavors. The
**macros** (`FR_ADD`, `FR_SUB`)
are mixed-radix, inline, and wrap on overflow. The **s.16
helper functions** (`FR_FixMuls`,
@@ -346,7 +346,7 @@ Four shifts plus three adds — cheap on an 8051, AVR, or any
hand-written DSP inner loop — and the answer has at most
±0.5 LSB of truncation error. The same discipline applies to
the other direction: in `FR_DEG2BAM` the divide-by-360 is
-a compile-time constant, so any optimising compiler folds it into a
+a compile-time constant, so any optimizing compiler folds it into a
multiply-by-reciprocal (or, on a weaker toolchain, a runtime call
that you can inline yourself).
diff --git a/docs/building.md b/docs/building.md
index c9f5f21..da61b4f 100644
--- a/docs/building.md
+++ b/docs/building.md
@@ -101,7 +101,7 @@ binaries to keep compile times low:
| `test_log_exp` | Log base 2 / ln / log10 and their inverses. |
| `test_2d` | 2D transforms, determinants, inverses. |
| `test_full_coverage` | Dark-corner cases: overflow sentinels, edge radixes, round-trips. |
-| `test_tdd` | Characterisation tests pinned to bit-exact reference values. |
+| `test_tdd` | Characterization tests pinned to bit-exact reference values. |
As of v2.0.0 the suite contains **42 tests** across
those binaries and covers **99%** of the library source.
@@ -119,12 +119,12 @@ make test
### Running the TDD pins after a change
-`test_tdd.cpp` is a characterisation suite. It records
+`test_tdd.cpp` is a characterization suite. It records
exact bit patterns for a sample of inputs and fails loudly if those
-patterns drift. Any change that modifies the numerical behaviour of
+patterns drift. Any change that modifies the numerical behavior of
the library will break this suite — that's the point.
-If you *intended* to change the numerical behaviour (e.g.
+If you *intended* to change the numerical behavior (e.g.
you improved a polynomial approximation), update the pinned values in
`tests/test_tdd.cpp` and note the change in
`release_notes.md` along with any updates to the
diff --git a/docs/examples.md b/docs/examples.md
index b7d6145..1716efd 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -425,7 +425,7 @@ The `XFormPtI16` fast path takes `s16`
coordinates in and writes `s16` out. It's a tiny
bit lossier than the `s32` form, but it sidesteps all
the fixed-point conversion on the hot path — useful inside
-the inner loop of a scanline rasteriser where you already know
+the inner loop of a scanline rasterizer where you already know
your coordinates fit in 16 bits.
*Caveats:* the output is narrowed to `s16`,
@@ -511,7 +511,7 @@ int main(void)
FR_printNumF(buf_putc, val, 16, 0, 8);
printf(" 16 16 0x%08x %s\n", (unsigned)val, buf);
/* Expected: "3.14158630" — good through 5 digits, then
- * quantisation noise appears. This is the sweet spot for
+ * quantization noise appears. This is the sweet spot for
* most embedded work: 16 bits of fraction fits in an s32
* with 15 bits of integer range (±32767). */
}
@@ -562,7 +562,7 @@ at radix 8 the value is `0x324` — only 10 significant bits —
so the decimal rendering can only faithfully reproduce about two
fractional digits. At radix 24 the value is `0x03243F6A` — 26
significant bits — and seven decimal digits survive. The
-eighth digit (`5` vs `4`) shows the quantisation floor: `2^−24 ≈
+eighth digit (`5` vs `4`) shows the quantization floor: `2^−24 ≈
6 × 10^−8`, so the last digit is always uncertain.
## See also
diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md
index 4c127ad..ab79007 100644
--- a/docs/fixed-point-primer.md
+++ b/docs/fixed-point-primer.md
@@ -266,14 +266,14 @@ you want to think of an FR_Math value as a "number with a
radix", think of the radix as a *type annotation that lives
in your source code*, not a runtime field.
-## Quantisation and loss of precision
+## Quantization and loss of precision
Fixing the radix also fixes the smallest representable fractional
step. At radix *N*, that step is `2^−N` — nothing finer survives
the round-trip into the integer. Any real value smaller than the
step rounds to zero; any real value landing between two adjacent
steps rounds to one of them. The difference between the ideal
-value and its stored form is called **quantisation error**, and it
+value and its stored form is called **quantization error**, and it
is the main price paid for doing fractional math in integer
registers.
@@ -295,7 +295,7 @@ radix 16 and the picture changes:
error = 0.00000153 (< 0.002 %)
```
-This behaviour isn't a bug — it is the same compromise IEEE-754
+This behavior isn't a bug — it is the same compromise IEEE-754
floating point makes with its mantissa. The difference is that a
float hides the trade-off behind a variable exponent, while
fixed-point puts it on a ledger that the programmer chooses up
@@ -307,7 +307,7 @@ half the smallest step the application cares about. Any coarser
and small signals vanish; any finer and integer headroom is being
spent for no benefit.
-A second consequence worth recording: quantisation error
+A second consequence worth recording: quantization error
*accumulates*. Summing a million low-radix values sums the errors
too. Signal-processing pipelines with long feedback paths are the
main reason to carry accumulators at a wider radix than the
@@ -375,7 +375,7 @@ FR_Math ships this operation as
callback `f`, which makes it usable on targets without stdio — a
UART write, an LCD glyph pusher, a ring-buffer append. The `pad`
parameter sets a minimum field width and `prec` sets the number of
-fractional digits. Rounding behaviour matches the hand-rolled
+fractional digits. Rounding behavior matches the hand-rolled
version: excess fractional digits are truncated, and negative
values are handled without the two's-complement trap described
above.
@@ -384,7 +384,7 @@ above.
Once you've chosen a radix, the everyday operations behave
almost like integer math — with one or two twists per
-operation that you just have to internalise. Let's walk
+operation that you just have to internalize. Let's walk
through them.
### Addition and subtraction
@@ -527,7 +527,7 @@ Three things to watch for:
it explicitly before the divide.
- **Rounding toward zero.** C's integer division truncates toward
zero for both signs, so `−7 / 2 == −3` (not `−4`). Fixed-point
- division inherits that behaviour. Round-to-nearest can be
+ division inherits that behavior. Round-to-nearest can be
layered on top by adding `b / 2` (for a positive numerator) or
`−b / 2` (for a negative numerator) to the pre-scaled numerator
before the divide.
@@ -557,7 +557,7 @@ for you:
- Going to a *smaller* radix — the low bits are
dropped. Precision is lost; headroom grows. This is a good
place to add `± (1 << (from_r - to_r - 1))`
- before the shift if you want round-to-nearest behaviour.
+ before the shift if you want round-to-nearest behavior.
The value is conserved as closely as the destination radix can
represent it. Nothing more, nothing less.
@@ -620,7 +620,7 @@ and store the result back into a 32-bit register without thinking
about it, you will eventually pass a pair of inputs whose product
doesn't fit, and plain C will hand you wrap-around garbage
with no warning. A signed 32-bit multiply that overflows is not a
-runtime error in C — it's undefined behaviour that
+runtime error in C — it's undefined behavior that
happens to look like data most of the time.
FR_Math defends against this in three layers, and it's
@@ -711,12 +711,12 @@ you actually need 15 integer bits on that particular signal.
## A worked example: one-pole IIR low-pass filter
The sections up to this point have introduced the pieces
-individually: scaling, notation, quantisation, arithmetic,
+individually: scaling, notation, quantization, arithmetic,
overflow, and radix choice. A small end-to-end example is the
fastest way to see how those pieces fit together on a real
pipeline. The filter walked through below is a single-pole
infinite-impulse-response (IIR) low-pass — about the simplest
-entry in the DSP catalogue, but realistic enough to exercise
+entry in the DSP catalog, but realistic enough to exercise
nearly every decision the primer has covered so far.
In floating point, the filter is one line of arithmetic:
@@ -753,7 +753,7 @@ be picked:
`x`, so it shares the same ±32767 output range. But because it
accumulates small updates on every sample, it will drift and
lose precision unless carried at a higher radix than the raw
- input. This is the quantisation-error accumulation noted
+ input. This is the quantization-error accumulation noted
earlier in the primer, showing up in practice.
### Step 2: pick the radixes
@@ -841,7 +841,7 @@ feeds both versions a few thousand samples — a mix of sine tones,
step inputs, and silence is enough to exercise the relevant paths
— and reports the worst-case delta. For a radix-15 one-pole IIR
the expected worst-case difference is on the order of a few LSB,
-comparable to the inherent quantisation of the 16-bit output
+comparable to the inherent quantization of the 16-bit output
format and not audible in normal listening. Anything substantially
larger indicates a radix choice that is too tight, a rounding
mode that is drifting, or a missing int64 promotion on the
@@ -917,12 +917,12 @@ Angles deserve their own section because FR_Math gives you
angle into?** Because the `u16` wraparound *is* the angular
modulus — that's the whole feature. Adding two `u16` BAM values
automatically gives you the right answer modulo a full revolution,
-with zero quantisation error at the boundary and no `% 65536` in
+with zero quantization error at the boundary and no `% 65536` in
sight. If BAM were `s32`, every read of the table would have to
explicitly mask off the top bits (and handle negative values)
before the quadrant extraction (`bam >> 14`) made any sense. You
would have traded one free operation for two slow ones on every
-sample, just to get back the same behaviour. So instead, the public
+sample, just to get back the same behavior. So instead, the public
trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends)
*all* take signed angles — in degrees, fixed-radix degrees, or
radians — and only the internal `fr_cos_bam` / `fr_sin_bam`
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 54b7f17..9eac417 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -284,7 +284,7 @@ coverage across the library sources.
conventions work.
- **[API Reference](api-reference.md)**
— per-symbol inputs, outputs, precision, and saturation
- behaviour.
+ behavior.
- **[Examples](examples.md)** —
runnable snippets for common tasks.
- **[Building & Testing](building.md)**
diff --git a/docs/releases.md b/docs/releases.md
index 277b811..6655693 100644
--- a/docs/releases.md
+++ b/docs/releases.md
@@ -4,6 +4,17 @@ Release highlights. For the full per-symbol change log, see
[release_notes.md](https://github.com/deftio/fr_math/blob/master/release_notes.md)
in the repo.
+## v2.0.8 — 2026
+
+Tangent accuracy rewrite and trig rounding fix.
+
+- **BAM-native tangent**: new `fr_tan_bam(u16 bam)` with 65-entry octant table (130 bytes). No 64-bit math. `FR_TanI`, `FR_Tan`, `fr_tan` are now thin wrappers.
+- **Round-to-nearest fix**: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy.
+- **Conversion macro trimming**: `FR_DEG2BAM` and `FR_RAD2BAM` reduced to ~18-21 bits (from ~28 bits). Verified: no measurable accuracy impact.
+- **`FR_TRIG_MINVAL` fixed**: now `-FR_TRIG_MAXVAL` (was `-FR_TRIG_MASK`)
+
+---
+
## v2.0.7 — 2026
README restructure, accuracy table cleanup, expanded cross-compile support.
@@ -124,7 +135,7 @@ with v2.0.0 except where noted.
- `FR_DIV(x, xr, y, yr)` — fixed-point division with 64-bit
pre-scaling. Now **rounds to nearest** (≤ 0.5 LSB error)
instead of truncating. `FR_DIV_TRUNC` preserves the old
- truncating behaviour for backward compatibility. `FR_DIV32` is
+ truncating behavior for backward compatibility. `FR_DIV32` is
the 32-bit-only truncating path.
- `FR_MOD(x, xr, y, yr)` — fixed-point modulus.
@@ -149,7 +160,7 @@ with v2.0.0 except where noted.
| FR_atan signature | `(input, radix)` → s16 degrees | `(input, radix, out_radix)` → s32 radians |
| FR_atan2 signature | `(y, x)` → s16 degrees | `(y, x, out_radix)` → s32 radians |
| FR_BAM2RAD | off by 1024× (bug) | correct |
-| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behaviour) |
+| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behavior) |
---
@@ -187,7 +198,7 @@ mandatory.
dropped.
- `FR_atan`, `FR_Tan`, `FR_TanI`:
wiring and overflow fixes.
-- `FR_printNumD/F/H`: fixed undefined behaviour on
+- `FR_printNumD/F/H`: fixed undefined behavior on
`INT_MIN` and a broken fraction extraction in the
v1 code.
- `FR_DEG2RAD` / `FR_RAD2DEG`: macro bodies
@@ -205,7 +216,7 @@ mandatory.
`FR_BAM2DEG`, `FR_RAD2BAM`,
`FR_BAM2RAD`. BAM (16 bits per full circle) is the
natural integer representation for phase accumulators and
- gives zero quantisation at the wraparound.
+ gives zero quantization at the wraparound.
- **Square root and hypot**: `FR_sqrt`
uses a digit-by-digit integer isqrt on `int64_t`;
`FR_hypot` computes `sqrt(x² + y²)`
@@ -260,8 +271,8 @@ mandatory.
### Test suite
v2 ships with **42 tests** across six test binaries
-and a characterisation suite (`test_tdd.cpp`) that pins
-numerical behaviour to bit-exact reference values. Overall line
+and a characterization suite (`test_tdd.cpp`) that pins
+numerical behavior to bit-exact reference values. Overall line
coverage is **99%** on the library sources.
## v1.0.3 — 2025
diff --git a/llms.txt b/llms.txt
index a6d254f..7c0ce3e 100644
--- a/llms.txt
+++ b/llms.txt
@@ -27,7 +27,7 @@ Common radix choices:
- `src/FR_math.h` — all public declarations, macros, constants
- `src/FR_math.c` — all function implementations (~42KB)
- `src/FR_defs.h` — type aliases: s8, s16, s32, u8, u16, u32
-- `src/FR_trig_table.h` — precomputed sine table (256 entries)
+- `src/FR_trig_table.h` — precomputed cosine quadrant table (129 entries) + tangent octant table (65 entries)
- `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++)
## Types
@@ -82,6 +82,7 @@ s32 fr_tan(s32 rad, u16 radix);
// BAM (Binary Angle Measurement) — u16 where 65536 = 360 degrees:
s32 fr_cos_bam(u16 bam);
s32 fr_sin_bam(u16 bam);
+s32 fr_tan_bam(u16 bam);
// Degree API at any radix:
s32 FR_Cos(s16 deg, u16 radix);
diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html
index 5c03c66..328d2f4 100644
--- a/pages/guide/api-reference.html
+++ b/pages/guide/api-reference.html
@@ -5,7 +5,7 @@
API Reference — FR_Math
-
+
@@ -18,7 +18,7 @@
API Reference
Every public symbol, grouped by topic. Each entry lists the radix
-convention, the precision, and the error / saturation behaviour. All
+convention, the precision, and the error / saturation behavior. All
types are from FR_defs.h: s8 s16 s32 s64 for
signed and u8 u16 u32 u64 for unsigned integers (these are
aliases for the <stdint.h> types).
@@ -29,7 +29,7 @@
Reading this reference
radix handling and precision
separately, because in a mixed-radix library those four things are
what actually lets you plan an arithmetic pipeline without hidden
-quantisation. If you are new to fixed-point, the
+quantization. If you are new to fixed-point, the
Fixed-Point Primer explains the
notation first; come back here once you’re comfortable reading
s15.16 and s0.15.
@@ -162,7 +162,7 @@
Integer ↔ fixed-point
FR_INT(x, r)
x: fixed-point at radix r
integer
-
Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behaviour.
+
Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behavior.
FR_NUM(i, f, d, r)
@@ -316,7 +316,7 @@
Utility macros
FR_DIV_TRUNC(x, xr, y, yr)
x: numerator at radix xr; y: denominator at radix yr
((s64)(x) << (yr)) / (s32)(y)
-
Truncating division (rounds toward zero). This was the behaviour of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable.
+
Truncating division (rounds toward zero). This was the behavior of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable.
FR_DIV32(x, xr, y, yr)
@@ -335,7 +335,7 @@
Utility macros
Arithmetic
-
FR_Math splits arithmetic into three flavours. The
+
FR_Math splits arithmetic into three flavors. The
macros (FR_ADD, FR_SUB)
are mixed-radix, inline, and wrap on overflow. The s.16
helper functions (FR_FixMuls,
@@ -564,7 +564,7 @@
Worked example: keeping precision on chips without a multiplier
hand-written DSP inner loop — and the answer has at most
±0.5 LSB of truncation error. The same discipline applies to
the other direction: in FR_DEG2BAM the divide-by-360 is
-a compile-time constant, so any optimising compiler folds it into a
+a compile-time constant, so any optimizing compiler folds it into a
multiply-by-reciprocal (or, on a weaker toolchain, a runtime call
that you can inline yourself).
diff --git a/pages/guide/building.html b/pages/guide/building.html
index 10b3739..246afbb 100644
--- a/pages/guide/building.html
+++ b/pages/guide/building.html
@@ -119,7 +119,7 @@
Characterisation tests pinned to bit-exact reference values.
+
test_tdd
Characterization tests pinned to bit-exact reference values.
@@ -137,12 +137,12 @@
Running a single binary
Running the TDD pins after a change
-
test_tdd.cpp is a characterisation suite. It records
+
test_tdd.cpp is a characterization suite. It records
exact bit patterns for a sample of inputs and fails loudly if those
-patterns drift. Any change that modifies the numerical behaviour of
+patterns drift. Any change that modifies the numerical behavior of
the library will break this suite — that’s the point.
-
If you intended to change the numerical behaviour (e.g.
+
If you intended to change the numerical behavior (e.g.
you improved a polynomial approximation), update the pinned values in
tests/test_tdd.cpp and note the change in
release_notes.md along with any updates to the
diff --git a/pages/guide/examples.html b/pages/guide/examples.html
index 137525b..fec3f91 100644
--- a/pages/guide/examples.html
+++ b/pages/guide/examples.html
@@ -423,7 +423,7 @@
10. Integer-only 2D transform for scanline renderers
coordinates in and writes s16 out. It’s a tiny
bit lossier than the s32 form, but it sidesteps all
the fixed-point conversion on the hot path — useful inside
-the inner loop of a scanline rasteriser where you already know
+the inner loop of a scanline rasterizer where you already know
your coordinates fit in 16 bits.
Caveats: the output is narrowed to s16,
@@ -510,7 +510,7 @@
11. String round-trip and radix precision
FR_printNumF(buf_putc, val, 16, 0, 8);
printf(" 16 16 0x%08x %s\n", (unsigned)val, buf);
/* Expected: "3.14158630" — good through 5 digits, then
- * quantisation noise appears. This is the sweet spot for
+ * quantization noise appears. This is the sweet spot for
* most embedded work: 16 bits of fraction fits in an s32
* with 15 bits of integer range (±32767). */
}
@@ -558,7 +558,7 @@
11. String round-trip and radix precision
so the decimal rendering can only faithfully reproduce about two
fractional digits. At radix 24 the value is 0x03243F6A — 26
significant bits — and seven decimal digits survive. The
-eighth digit (5 vs 4) shows the quantisation floor:
+eighth digit (5 vs 4) shows the quantization floor:
2^−24 ≈ 6 × 10^−8, so the last digit is always
uncertain.
diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html
index a4e71c4..ce41074 100644
--- a/pages/guide/fixed-point-primer.html
+++ b/pages/guide/fixed-point-primer.html
@@ -294,7 +294,7 @@
Notation: sM.N and the radix
radix”, think of the radix as a type annotation that lives
in your source code, not a runtime field.
-
Quantisation and loss of precision
+
Quantization and loss of precision
Fixing the radix also fixes the smallest representable
fractional step. At radix N, that step is
@@ -302,7 +302,7 @@
Quantisation and loss of precision
the round-trip into the integer. Any real value smaller than the
step rounds to zero; any real value landing between two adjacent
steps rounds to one of them. The difference between the ideal
-value and its stored form is called quantisation
+value and its stored form is called quantization
error, and it is the main price paid for doing
fractional math in integer registers.
@@ -323,7 +323,7 @@
Quantisation and loss of precision
error = 0.00000153 (< 0.002 %)
-
This behaviour isn’t a bug — it is the same
+
This behavior isn’t a bug — it is the same
compromise IEEE-754 floating point makes with its mantissa. The
difference is that a float hides the trade-off behind a variable
exponent, while fixed-point puts it on a ledger that the
@@ -336,7 +336,7 @@
Quantisation and loss of precision
vanish; any finer and integer headroom is being spent for no
benefit.
-
A second consequence worth recording: quantisation error
+
A second consequence worth recording: quantization error
accumulates. Summing a million low-radix values sums
the errors too. Signal-processing pipelines with long feedback
paths are the main reason to carry accumulators at a wider radix
@@ -406,7 +406,7 @@
Displaying a fixed-point value
usable on targets without stdio — a UART write, an LCD
glyph pusher, a ring-buffer append. The pad
parameter sets a minimum field width and prec sets
-the number of fractional digits. Rounding behaviour matches the
+the number of fractional digits. Rounding behavior matches the
hand-rolled version: excess fractional digits are truncated, and
negative values are handled without the two’s-complement
trap described above.
@@ -415,7 +415,7 @@
Arithmetic: what the operations actually do
Once you’ve chosen a radix, the everyday operations behave
almost like integer math — with one or two twists per
-operation that you just have to internalise. Let’s walk
+operation that you just have to internalize. Let’s walk
through them.
Addition and subtraction
@@ -489,7 +489,7 @@
Multiplication
doesn’t fire. Rounds to nearest —
adds 0.5 LSB before the shift.
FR_FixMulSat(a, b, r) — same shape with
- the same round-to-nearest behaviour, but also saturates to
+ the same round-to-nearest behavior, but also saturates to
FR_OVERFLOW_POS /
FR_OVERFLOW_NEG if the result wouldn’t
fit. Prefer this one by default unless you’ve proven
@@ -555,7 +555,7 @@
Division
division truncates toward zero for both signs, so
−7 / 2 == −3 (not
−4). Fixed-point division inherits
- that behaviour. Round-to-nearest can be layered on top by
+ that behavior. Round-to-nearest can be layered on top by
adding b / 2 (for a positive numerator) or
−b / 2 (for a negative numerator) to
the pre-scaled numerator before the divide.
@@ -581,7 +581,7 @@
Changing radix
Going to a smaller radix — the low bits are
dropped. Precision is lost; headroom grows. This is a good
place to add ± (1 << (from_r - to_r - 1))
- before the shift if you want round-to-nearest behaviour.
+ before the shift if you want round-to-nearest behavior.
The value is conserved as closely as the destination radix can
@@ -644,7 +644,7 @@
Overflow, saturation, and the sentinels
about it, you will eventually pass a pair of inputs whose product
doesn’t fit, and plain C will hand you wrap-around garbage
with no warning. A signed 32-bit multiply that overflows is not a
-runtime error in C — it’s undefined behaviour that
+runtime error in C — it’s undefined behavior that
happens to look like data most of the time.
FR_Math defends against this in three layers, and it’s
@@ -743,12 +743,12 @@
Choosing a radix
A worked example: one-pole IIR low-pass filter
The sections up to this point have introduced the pieces
-individually: scaling, notation, quantisation, arithmetic,
+individually: scaling, notation, quantization, arithmetic,
overflow, and radix choice. A small end-to-end example is the
fastest way to see how those pieces fit together on a real
pipeline. The filter walked through below is a single-pole
infinite-impulse-response (IIR) low-pass — about the
-simplest entry in the DSP catalogue, but realistic enough to
+simplest entry in the DSP catalog, but realistic enough to
exercise nearly every decision the primer has covered so far.
In floating point, the filter is one line of arithmetic:
@@ -790,7 +790,7 @@
Step 1: inventory the ranges
±32767 output range. But because it accumulates
small updates on every sample, it will drift and lose
precision unless carried at a higher radix than the raw
- input. This is the quantisation-error accumulation noted
+ input. This is the quantization-error accumulation noted
earlier in the primer, showing up in practice.
@@ -889,7 +889,7 @@
Step 5: test against the reference
exercise the relevant paths — and reports the worst-case
delta. For a radix-15 one-pole IIR the expected worst-case
difference is on the order of a few LSB, comparable to the
-inherent quantisation of the 16-bit output format and not
+inherent quantization of the 16-bit output format and not
audible in normal listening. Anything substantially larger
indicates a radix choice that is too tight, a rounding mode
that is drifting, or a missing int64 promotion on the
@@ -973,13 +973,13 @@
Angle representations
u16 wraparound is the angular modulus —
that’s the whole feature. Adding two u16 BAM
values automatically gives you the right answer modulo a full
-revolution, with zero quantisation error at the boundary and no
+revolution, with zero quantization error at the boundary and no
% 65536 in sight. If BAM were s32, every
read of the table would have to explicitly mask off the top bits
(and handle negative values) before the quadrant extraction
(bam >> 14) made any sense. You would have traded
one free operation for two slow ones on every sample, just to get
-back the same behaviour. So instead, the public trig entry points
+back the same behavior. So instead, the public trig entry points
(FR_CosI, FR_Cos, fr_cos, and
friends) all take signed angles — in degrees,
fixed-radix degrees, or radians — and only the internal
diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html
index b6a22ed..ee8f5df 100644
--- a/pages/guide/getting-started.html
+++ b/pages/guide/getting-started.html
@@ -297,7 +297,7 @@
Next steps
conventions work.
API Reference
— per-symbol inputs, outputs, precision, and saturation
- behaviour.
FPU does not exist and even software float is too slow or too
large.
Hot inner loops on any CPU where a
- parameterised-radix integer multiply is faster and more
+ parameterized-radix integer multiply is faster and more
deterministic than a float. Think DSP taps, PID
loops, coordinate transforms inside a scanline renderer.
Bit-exact reproducibility across compilers,
diff --git a/pages/releases.html b/pages/releases.html
index 337035a..e5ec364 100644
--- a/pages/releases.html
+++ b/pages/releases.html
@@ -159,7 +159,7 @@
New utility macros
FR_DIV(x, xr, y, yr) — fixed-point division with
64-bit pre-scaling. Now rounds to nearest
(≤ 0.5 LSB error) instead of truncating.
- FR_DIV_TRUNC preserves the old truncating behaviour
+ FR_DIV_TRUNC preserves the old truncating behavior
for backward compatibility. FR_DIV32 is the 32-bit-only
truncating path.
FR_MOD(x, xr, y, yr) — fixed-point modulus.
@@ -190,7 +190,7 @@
Breaking changes from v2.0.0
FR_atan signature
(input, radix) → s16 degrees
(input, radix, out_radix) → s32 radians
FR_atan2 signature
(y, x) → s16 degrees
(y, x, out_radix) → s32 radians
FR_BAM2RAD
off by 1024× (bug)
correct
-
FR_DIV rounding
truncates toward zero
rounds to nearest (use FR_DIV_TRUNC for old behaviour)
+
FR_DIV rounding
truncates toward zero
rounds to nearest (use FR_DIV_TRUNC for old behavior)
@@ -231,7 +231,7 @@
Numerical fixes
dropped.
FR_atan, FR_Tan, FR_TanI:
wiring and overflow fixes.
-
FR_printNumD/F/H: fixed undefined behaviour on
+
FR_printNumD/F/H: fixed undefined behavior on
INT_MIN and a broken fraction extraction in the
v1 code.
FR_BAM2DEG, FR_RAD2BAM,
FR_BAM2RAD. BAM (16 bits per full circle) is the
natural integer representation for phase accumulators and
- gives zero quantisation at the wraparound.
+ gives zero quantization at the wraparound.
Square root and hypot: FR_sqrt
uses a digit-by-digit integer isqrt on int64_t;
FR_hypot computes sqrt(x² + y²)
@@ -313,8 +313,8 @@
Breaking changes
Test suite
v2 ships with 42 tests across six test binaries
-and a characterisation suite (test_tdd.cpp) that pins
-numerical behaviour to bit-exact reference values. Overall line
+and a characterization suite (test_tdd.cpp) that pins
+numerical behavior to bit-exact reference values. Overall line
coverage is 99% on the library sources.
v1.0.3 — 2025
diff --git a/release_notes.md b/release_notes.md
index 8a5f3bb..f5e1ef0 100644
--- a/release_notes.md
+++ b/release_notes.md
@@ -1,5 +1,51 @@
# FR_Math Release Notes
+## Version 2.0.8 (2026)
+
+Tangent accuracy rewrite and trig rounding fix.
+
+### BAM-native tangent table
+
+- **New `fr_tan_bam(u16 bam)`** function with a dedicated 65-entry octant
+ lookup table (`gFR_TAN_TAB_O` in `FR_trig_table.h`, 130 bytes ROM).
+ First octant uses direct table + lerp; second octant uses the
+ reciprocal identity `tan(x) = 1/tan(90-x)` with one 32-bit division.
+ No 64-bit intermediates anywhere in the tan path.
+- **`FR_TanI`, `FR_Tan`, `fr_tan`** are now thin wrappers over
+ `fr_tan_bam`. The old sin/cos division implementation is removed.
+- Peak error: 0.17% (BAM), 0.60% (deg r7), 0.17% (rad r16).
+
+### Round-to-nearest fix for radian/degree wrappers
+
+- `fr_cos`, `fr_sin`, `fr_tan`, `FR_Cos`, `FR_Sin`, `FR_Tan` now add
+ 0.5 LSB (`1 << (radix-1)`) before the `>> radix` shift when converting
+ from radians/degrees to BAM. This rounds to the nearest BAM value
+ instead of truncating, eliminating a systematic 1-BAM rounding error
+ that caused ~1% peak error near zero crossings.
+- Radian-path sin/cos/tan now match BAM-native accuracy (0.16-0.17%
+ peak, was ~1.03%).
+
+### Conversion macro trimming
+
+- `FR_DEG2BAM`: 10 terms (~28 bits) reduced to 7 terms (~18 bits)
+- `FR_RAD2BAM`: 9 terms (~27 bits) reduced to 7 terms (~21 bits)
+- `FR_DEG2RAD`: 3 terms (~13 bits) extended to 5 terms (~17 bits)
+- 18 bits of precision gives 4 bits of headroom over the 14-bit
+ effective BAM resolution of the trig tables. Verified: reverting to
+ the old full-precision macros changes sin/cos peak error by <0.04%.
+
+### Other
+
+- `FR_TRIG_MINVAL` fixed: was `-FR_TRIG_MASK` (-65535), now
+ `-FR_TRIG_MAXVAL` (-2147483647) to properly pair with `FR_TRIG_MAXVAL`
+ for tan saturation clamping.
+- Accuracy table in all docs now shows separate BAM/deg/rad rows for
+ sin/cos and tan, matching the TDD characterization report.
+- `fr_tan_bam` added to function listings across README, docs, HTML
+ pages, and llms.txt.
+
+---
+
## Version 2.0.7 (2026)
README restructure, accuracy table cleanup, and expanded cross-compile support.
diff --git a/src/FR_math.c b/src/FR_math.c
index 181972e..dce131b 100644
--- a/src/FR_math.c
+++ b/src/FR_math.c
@@ -86,8 +86,16 @@ s32 fr_cos_bam(u16 bam)
d = lo - hi; /* >= 0: cos monotonic */
v = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
- /* Shift s0.15 → s15.16 */
- v <<= 1;
+ if (v < 0x40) {
+ /* Near zero crossing: redo interpolation with 3 extra bits of
+ * precision to reduce rounding error when the result is small. */
+ s32 lo3 = (s32)gFR_COS_TAB_Q[idx] << 3;
+ s32 d3 = lo3 - ((s32)gFR_COS_TAB_Q[idx + 1] << 3);
+ v = lo3 - (((d3 * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+ v = (v + 2) >> 2; /* s0.18 → s15.16 with rounding */
+ } else {
+ v <<= 1; /* s0.15 → s15.16 */
+ }
return (q == 1 || q == 2) ? -v : v;
}
@@ -100,19 +108,6 @@ s32 fr_sin_bam(u16 bam)
return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT));
}
-/* Convert radians at given radix to BAM with rounding.
- * One radian = 65536 / (2*pi) ≈ 10430.378 BAM units.
- * We use the more precise scaled constant 10430378 / 1000 to keep error
- * bounded across a wide range of radians.
- */
-static u16 fr_rad_to_bam(s32 rad, u16 radix)
-{
- int64_t scaled = ((int64_t)rad * 10430378LL) / 1000;
- if (radix > 0)
- scaled >>= radix;
- return (u16)((u32)scaled & 0xffff);
-}
-
s32 fr_cos(s32 rad, u16 radix)
{
return fr_cos_bam(fr_rad_to_bam(rad, radix));
@@ -123,70 +118,131 @@ s32 fr_sin(s32 rad, u16 radix)
return fr_sin_bam(fr_rad_to_bam(rad, radix));
}
-/* fr_tan: returns sin/cos at s15.16 (radix 16). Saturates if cos is near zero. */
-s32 fr_tan(s32 rad, u16 radix)
-{
- u16 bam = fr_rad_to_bam(rad, radix);
- s32 s = fr_sin_bam(bam);
- s32 c = fr_cos_bam(bam);
- if (c == 0)
- return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
- return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
-}
-
/*=======================================================
- * Integer-degree and fixed-radix-degree trig wrappers
+ * BAM-native tangent: fr_tan_bam
*
- * FR_CosI / FR_SinI are macros in the header (zero cost). The fixed-radix
- * variants here convert s.r degrees to BAM in one shot using a precomputed
- * reciprocal of 360 to avoid division on multiply-poor cores like 8051.
+ * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant
+ * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the
+ * second octant (45°, 90°). Result is s15.16 with saturation at the
+ * poles.
*
- * Math: bam = deg * (65536 / 360) = deg * 182.0444...
- * In s.16 fixed point: 65536 / 360 = 0xB60B (rounded). So
- * bam_u16 = (deg_s.r * 0xB60B) >> r
- * gives bam in u16 BAM units. The constant 0xB60B contains the divide by
- * 360 baked in; the shift `>> r` strips the input radix.
+ * No 64-bit intermediates. One 32-bit division only in the >45° path.
*/
-static u16 fr_deg_radix_to_bam(s16 deg, u16 radix)
+s32 fr_tan_bam(u16 bam)
{
- /* 0xB60B ≈ (65536/360) * 256 — the ×256 prescale keeps 32-bit math
- * friendly to 8051-class MCUs. We must shift out both the input
- * fraction bits (radix) AND the 8-bit prescale, hence radix + 8.
- * The +half term rounds to nearest, matching FR_DEG2BAM behaviour.
- */
- s32 v = (s32)deg * 0xB60BL;
- u16 shift = radix + 8;
- return (u16)((u32)((v + (1L << (shift - 1))) >> shift) & 0xffff);
+ u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */
+ u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */
+ s32 sign = 1;
+ u32 idx, frac;
+ s32 lo, hi, raw;
+
+ /* Exact zeros: bam lands exactly on 0° or 180° */
+ if (inq == 0 && (q == 0 || q == 2))
+ return 0;
+
+ /* Poles: bam lands exactly on 90° or 270° */
+ if (inq == 0 && (q == 1 || q == 3))
+ return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+ /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */
+ if (q == 1 || q == 3) {
+ inq = 0x4000u - inq;
+ sign = -1;
+ }
+
+ /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive.
+ * Split into first octant [0, 45°) and second octant [45°, 90°). */
+ if (inq < FR_TAN_OCTANT) {
+ /* First octant: direct table lookup + lerp.
+ * inq is 13 bits; top FR_TAN_TABLE_BITS index the table,
+ * bottom FR_TAN_FRAC_BITS drive interpolation. */
+ idx = inq >> FR_TAN_FRAC_BITS;
+ frac = inq & FR_TAN_FRAC_MASK;
+ lo = (s32)gFR_TAN_TAB_O[idx];
+ hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+ /* raw is in u0.15. Shift to s15.16. */
+ raw <<= 1;
+ } else {
+ /* Second octant: tan(x) = 1 / tan(90° - x).
+ * complement is in (0, 0x2000] = (0°, 45°]. */
+ u32 comp = 0x4000u - inq;
+
+ /* Look up tan(complement) from the table */
+ idx = comp >> FR_TAN_FRAC_BITS;
+ frac = comp & FR_TAN_FRAC_MASK;
+ lo = (s32)gFR_TAN_TAB_O[idx];
+ hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+ /* raw is tan(complement) in u0.15. Compute 1/raw in s15.16.
+ * 1.0 in s15.16 = 0x10000. We want (1<<16) / (raw_in_0.15)
+ * = (1<<16) * (1<<15) / raw_raw = (1<<31) / raw.
+ * Use unsigned to avoid overflow: 0x80000000 / raw. */
+ if (raw < 2) {
+ /* Near pole: saturate */
+ raw = FR_TRIG_MAXVAL;
+ } else {
+ raw = (s32)(0x80000000u / (u32)raw);
+ }
+ }
+
+ return (sign < 0) ? -raw : raw;
+}
+
+/* fr_tan: returns tan at s15.16 (radix 16). Uses BAM-native table.
+ * At exact poles, fr_tan_bam's sign convention is based on BAM quadrant
+ * which loses the original approach direction. Fix up: if the result
+ * saturates, the sign should match the sign of the radian input. */
+s32 fr_tan(s32 rad, u16 radix)
+{
+ s32 result = fr_tan_bam(fr_rad_to_bam(rad, radix));
+ if (result == FR_TRIG_MAXVAL && rad < 0)
+ return -FR_TRIG_MAXVAL;
+ if (result == -FR_TRIG_MAXVAL && rad > 0)
+ return FR_TRIG_MAXVAL;
+ return result;
}
-s32 FR_Cos(s16 deg, u16 radix)
+/*=======================================================
+ * Integer-degree and fixed-radix-degree trig wrappers
+ */
+s32 FR_Cos(s32 deg, u16 radix)
{
- return fr_cos_bam(fr_deg_radix_to_bam(deg, radix));
+ u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
+ return fr_cos_bam(bam);
}
-s32 FR_Sin(s16 deg, u16 radix)
+s32 FR_Sin(s32 deg, u16 radix)
{
- return fr_sin_bam(fr_deg_radix_to_bam(deg, radix));
+ u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
+ return fr_sin_bam(bam);
}
-s32 FR_TanI(s16 deg)
+s32 FR_TanI(s32 deg)
{
- u16 bam = FR_DEG2BAM(deg);
- s32 s = fr_sin_bam(bam);
- s32 c = fr_cos_bam(bam);
- if (c == 0)
- return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
- return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
+ /* Exact pole: deg mod 180 == ±90. Sign matches input sign
+ * (positive deg → +MAXVAL, negative deg → -MAXVAL). */
+ s32 rem = deg % 180;
+ if (rem == 90 || rem == -90)
+ return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+ return fr_tan_bam(FR_DEG2BAM_I(deg));
}
-s32 FR_Tan(s16 deg, u16 radix)
+s32 FR_Tan(s32 deg, u16 radix)
{
- u16 bam = fr_deg_radix_to_bam(deg, radix);
- s32 s = fr_sin_bam(bam);
- s32 c = fr_cos_bam(bam);
- if (c == 0)
- return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
- return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c);
+ /* Check for exact integer poles before using the shift-only DEG2BAM
+ * macro, which can map to the wrong BAM quadrant for large angles.
+ * Only trigger when fractional bits are zero (exact pole). */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 deg_int = deg >> radix;
+ s32 rem = deg_int % 180;
+ if (rem == 90 || rem == -90)
+ return (deg >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+ }
+ u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
+ return fr_tan_bam(bam);
}
/*=======================================================
@@ -275,7 +331,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
{
s32 one = (s32)1 << radix;
if (input_abs >= one)
- return sign ? FR_BAM2RAD(FR_BAM_HALF, out_radix) : 0;
+ return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0;
}
v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
@@ -297,7 +353,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */
s32 rad_out = FR_CHRDX(rad_native, radix, out_radix);
if (sign)
- rad_out = FR_BAM2RAD(FR_BAM_HALF, out_radix) - rad_out;
+ rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out;
return rad_out;
}
@@ -362,7 +418,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
if (sign)
bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */
- return FR_BAM2RAD(bam, out_radix);
+ return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
}
}
@@ -370,7 +426,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
s32 FR_asin(s32 input, u16 radix, u16 out_radix)
{
/* asin(x) = pi/2 - acos(x) */
- s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);
+ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
return half_pi - FR_acos(input, radix, out_radix);
}
@@ -394,12 +450,12 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
/* Axis cases — exact angles, no divide. */
if (x == 0)
{
- if (y > 0) return FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* pi/2 */
- if (y < 0) return -FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* -pi/2 */
+ if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */
+ if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */
return 0;
}
if (y == 0)
- return (x > 0) ? 0 : FR_BAM2RAD(FR_BAM_HALF, out_radix); /* 0 or pi */
+ return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */
ax = (x < 0) ? -x : x;
ay = (y < 0) ? -y : y;
@@ -443,7 +499,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
if (cos_val < FR_ATAN2_SMALL)
{
/* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */
- s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix);
+ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix);
}
else
@@ -453,7 +509,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix)
/* Apply quadrant from signs of x and y.
* q1_angle is always positive [0..pi/2]. */
{
- s32 pi = FR_BAM2RAD(FR_BAM_HALF, out_radix);
+ s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
if (x > 0)
return (y > 0) ? q1_angle : -q1_angle;
/* x < 0: mirror across y-axis */
diff --git a/src/FR_math.h b/src/FR_math.h
index 6eff284..a251316 100644
--- a/src/FR_math.h
+++ b/src/FR_math.h
@@ -270,8 +270,8 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
#define FR_TRIG_OUT_PREC (16)
#define FR_TRIG_MASK ((1 << (FR_TRIG_PREC)) - 1)
#define FR_TRIG_ONE (1L << FR_TRIG_OUT_PREC) /* 65536 = 1.0 */
-#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation */
-#define FR_TRIG_MINVAL (-FR_TRIG_MASK)
+#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation max */
+#define FR_TRIG_MINVAL (-FR_TRIG_MAXVAL) /* tan saturation min */
/* Bit Shift Scaling macros. Useful on some platforms with poor MUL performance.
* Also can be useful if you need to scale numbers with
@@ -304,32 +304,108 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
/* scale by log2(10) 3.32192809489 used for converting pow2() to pow10 */
#define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13))
-/* TRIG Conversion macros
- * Convert degrees <--> radians <--> quadrants <--> degrees
- * no multiply (may reduce chances of overflow in certain circumstances)
- * works on all int types and radixes (pure ints will have trunc err)
- * radians = 2*pi per revolution
- * degrees = 360 per revolution
- * quadrants = 4 per revolution
- * freq = 1 per revolution
- */
-/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 using shifts only.
- * Worst-case relative error: ~1.6e-4 (acceptable for embedded use; if you
- * need better precision, multiply by FR_kDEG2RAD and shift down by FR_kPREC).
- * Side-effect note: x is referenced 3 times, so do not pass an expression
- * with side effects.
+/* Shift-only angular conversion macros
+ *
+ * All are pure constant multipliers expressed as shifts — no multiply, no
+ * divide, no 64-bit intermediates, no accumulators. Work at any radix: if
+ * your input is degrees at radix 8, the output is the target unit at radix 8.
+ * The caller shifts as needed.
+ *
+ * Angular units:
+ * degrees = 360 per revolution
+ * radians = 2*pi per revolution
+ * BAM = 65536 per revolution (Binary Angular Measure, u16)
+ * quadrants = 4 per revolution (= BAM >> 14)
+ *
+ * Side-effect note: x is referenced multiple times in each macro — do not
+ * pass expressions with side effects.
*/
-#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13))
-/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.295780 using shifts only.
- * Worst-case relative error: ~2.1e-6.
- * Side-effect note: x is referenced 7 times.
- */
+/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */
+#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20))
+
+/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */
#define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10))
+/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits).
+ * CAUTION: overflows s32 when |x| > ~256 deg at s15.16 (x<<7 term).
+ * For safe conversion at any radix, use fr_deg_to_bam() instead. */
+#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9))
+
+/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */
+#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13))
+
+/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits).
+ * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term).
+ * For safe conversion at any radix, use fr_rad_to_bam() instead. */
+#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3))
+
+/* ── Overflow-safe rad/deg to BAM conversion functions ─────────────
+ *
+ * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that
+ * need the full ±2*pi or ±360° range at any radix.
+ *
+ * Strategy: normalize input to radix 16, conditionally reduce into
+ * a safe zone, apply the full-precision shift-only multiply, then
+ * extract the u16 BAM. No precision loss from halving/quartering.
+ *
+ * fr_rad_to_bam: reduce to [-pi, pi], reordered terms. ±2*pi safe.
+ * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe.
+ */
+
+/* Constants at radix 16 */
+#define FR_PI_R16 205887 /* round(pi * 65536) */
+#define FR_TWO_PI_R16 411775 /* round(2*pi * 65536) */
+#define FR_D90_R16 5898240 /* 90 * 65536 */
+#define FR_D180_R16 11796480 /* 180 * 65536 */
+#define FR_D360_R16 23592960 /* 360 * 65536 */
+
+static u16 __attribute__((unused)) fr_rad_to_bam(s32 rad, u16 radix)
+{
+ /* Normalize to radix 16 */
+ s32 r = (radix > 16) ? (rad >> (radix - 16))
+ : (radix < 16) ? (rad << (16 - radix))
+ : rad;
+
+ /* Reduce to [-pi, pi] — one conditional pass, covers ±2*pi input */
+ if (r > FR_PI_R16) r -= FR_TWO_PI_R16;
+ if (r < -FR_PI_R16) r += FR_TWO_PI_R16;
+
+ /* Shift terms reordered: interleave negatives early to keep all
+ * intermediate sums within s32. Same 7-term decomposition as
+ * FR_RAD2BAM, just reordered. Safe for |r| <= 205887 (pi). */
+ s32 bam = (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1);
+ return (u16)((bam + (1 << 15)) >> 16);
+}
+
+static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix)
+{
+ /* Normalize to radix 16 */
+ s32 d = (radix > 16) ? (deg >> (radix - 16))
+ : (radix < 16) ? (deg << (16 - radix))
+ : deg;
+
+ /* Reduce to [-180, 180) — covers ±360 input */
+ if (d >= FR_D180_R16) d -= FR_D360_R16;
+ if (d < -FR_D180_R16) d += FR_D360_R16;
+
+ /* Reduce to [-90, 90) with BAM quadrant offset.
+ * Needed because 182 * 11796480 (±180° at r16) overflows s32. */
+ u16 offset = 0;
+ if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; }
+ else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+ /* |d| < 90° at r16. Max intermediate = 5898240 * 192 = 1.13B, safe. */
+ s32 bam = (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+ return (u16)(offset + (u16)((bam + (1 << 15)) >> 16));
+}
+
+/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */
+#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25))
+
+/* Legacy quadrant macros (quadrants = BAM >> 14) */
#define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14))
#define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11))
-
#define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13))
#define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1))
@@ -347,44 +423,12 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
* - The top 2 bits select the quadrant (no `% 360` modulo needed).
* - The next 7 bits index the 128-entry quadrant table directly.
* - The bottom 7 bits give linear-interpolation precision.
- *
- * All BAM macros are *macros* (not functions) so they evaluate inline and
- * cost nothing if you don't call them. Side-effect note: each macro
- * references its argument multiple times — do not pass an expression with
- * side effects.
*/
#define FR_BAM_BITS (16)
#define FR_BAM_FULL (1L << FR_BAM_BITS) /* 65536 */
#define FR_BAM_QUADRANT (FR_BAM_FULL >> 2) /* 16384 */
#define FR_BAM_HALF (FR_BAM_FULL >> 1) /* 32768 */
-/* Convert degrees -> BAM. Exact formula: deg * 65536 / 360.
- * Computed in s32; for s16-range deg the intermediate (deg << 16) fits.
- * The cast to u16 wraps modulo full circle, which is mathematically correct.
- * Side-effect note: deg is referenced twice for sign-aware rounding.
- *
- * Worst-case error: <= 0.5 LSB BAM (~0.0028 deg) per degree. No accumulation
- * across full circles.
- */
-#define FR_DEG2BAM(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
-
-/* Convert BAM -> degrees. bam * (360 / 65536) ≈ bam * (45/8192).
- * Truncated; result is integer degrees.
- */
-#define FR_BAM2DEG(bam) ((s16)(((s32)(u16)(bam) * 45) >> 13))
-
-/* Convert radians (at given radix) -> BAM. rad * (65536 / (2*pi)) ≈ rad * 10430.378
- * For radix-16 input: ((rad * 10430) >> 16). Approximated; for high accuracy
- * combine with FR_kRAD2Q multiplier.
- */
-#define FR_RAD2BAM(rad, radix) ((u16)(((s32)(rad) * 10430L) >> (radix)))
-
-/* Convert BAM -> radians at the requested output radix.
- * Derivation: rad = bam * 2π / 65536. At output radix r: bam * 2π * 2^r / 2^16
- * = bam * (2π * 2^10) / 2^(26 - r) = bam * 6434 >> (26 - r).
- */
-#define FR_BAM2RAD(bam, radix) ((s32)(((s32)(u16)(bam) * 6434L) >> (26 - (radix))))
-
/*===============================================
* Radian-native and BAM-native trig (recommended)
*
@@ -406,12 +450,16 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
*/
s32 fr_cos_bam(u16 bam);
s32 fr_sin_bam(u16 bam);
+ s32 fr_tan_bam(u16 bam);
s32 fr_cos(s32 rad, u16 radix);
s32 fr_sin(s32 rad, u16 radix);
s32 fr_tan(s32 rad, u16 radix);
-#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM(deg))
-#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM(deg))
+/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */
+#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
+
+#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg))
+#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg))
/*===============================================
* Integer-degree trig API (thin wrappers over the BAM-native path)
@@ -423,13 +471,13 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
* FR_Sin(deg, radix) — sin of fixed-radix degrees, s15.16 result
* FR_Tan(deg, radix) — tan of fixed-radix degrees, s15.16 result
*/
-#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM(deg))
-#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM(deg))
+#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg))
+#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg))
- s32 FR_Cos(s16 deg, u16 radix);
- s32 FR_Sin(s16 deg, u16 radix);
- s32 FR_TanI(s16 deg);
- s32 FR_Tan(s16 deg, u16 radix);
+ s32 FR_Cos(s32 deg, u16 radix);
+ s32 FR_Sin(s32 deg, u16 radix);
+ s32 FR_TanI(s32 deg);
+ s32 FR_Tan(s32 deg, u16 radix);
/* Inverse trig — output in radians at caller-specified radix (s32).
* FR_atan2 returns radians at radix 16 (s15.16).
diff --git a/src/FR_tan32.c b/src/FR_tan32.c
new file mode 100644
index 0000000..1f8fdec
--- /dev/null
+++ b/src/FR_tan32.c
@@ -0,0 +1,282 @@
+/**
+ * @file FR_tan32.c - division-free tangent and binary-search atan2
+ *
+ * fr_tan_bam32: hybrid tangent — table lookup + sin/cos near pole.
+ * 0-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64].
+ * 45-75°: variable-radix u16 mantissa + shift tables (no division).
+ * 75-90°: sin/cos ratio from cosine table (one s64 division).
+ *
+ * fr_tan_bam32_d64: full-range sin/cos ratio from cosine table.
+ * Kept for comparison. One s64 division per call.
+ *
+ * fr_atan2_32: binary search on the 129-entry u32 tan quadrant table
+ * (gFR_TAN_TAB_Q), then quadrant mapping.
+ *
+ * @copy Copyright (C) <2001-2026>
+ * @author M A Chatterjee
+ *
+ */
+
+#include "FR_math.h"
+#include "FR_trig_table.h"
+#include "FR_tan_table.h"
+
+#ifndef FR_NO_STDINT
+#include
+#endif
+
+/*=======================================================
+ * cos_lerp_full — interpolated cosine from the 129-entry quadrant table.
+ *
+ * Returns cos(inq) in high-precision fixed-point (7 extra frac bits).
+ * Used internally by fr_tan_bam32 for the 75°-90° sin/cos path and
+ * by fr_tan_bam32_d64 for the full-range sin/cos path.
+ */
+static s32 cos_lerp_full(u32 inq)
+{
+ u32 idx = inq >> FR_TRIG_FRAC_BITS;
+ u32 frac = inq & FR_TRIG_FRAC_MASK;
+ s32 lo = gFR_COS_TAB_Q[idx];
+ s32 d = lo - gFR_COS_TAB_Q[idx + 1];
+ return (lo << FR_TRIG_FRAC_BITS) - d * (s32)frac;
+}
+
+/*=======================================================
+ * fr_tan_bam32 — hybrid tangent: table lookup + sin/cos near pole.
+ *
+ * Three zones:
+ * 0°-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64].
+ * 7-bit index + 7-bit frac. All u32, no division.
+ *
+ * 45°-75°: variable-radix u16 mantissa + u8 shift tables
+ * (gFR_TAN_MANT_Q2 / gFR_TAN_SHIFT_Q2).
+ * All u32, no division.
+ *
+ * 75°-90°: sin/cos ratio via the 129-entry cosine table.
+ * One s64 division. Handles the pole accurately.
+ *
+ * Poles: ±FR_TRIG_MAXVAL (90° = +, 270° = -).
+ * Result: s32 at radix 16 (s15.16).
+ */
+#define FR_TAN_OCT_HALF (1 << 13) /* 8192 = 45 deg in BAM quadrant */
+#define FR_TAN_D64_THRESH ((u32)(75.0 / 90.0 * 16384 + 0.5)) /* 13653 */
+
+s32 fr_tan_bam32(u16 bam)
+{
+ u32 q = ((u32)bam >> 14) & 0x3;
+ u32 inq = (u32)bam & 0x3FFFu;
+ s32 sign = (q & 1) ? -1 : 1;
+
+ /* Poles: exactly 90° or 270° */
+ if (inq == 0 && (q & 1))
+ return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+ if (q & 1)
+ inq = 0x4000u - inq;
+
+ u32 raw;
+
+ if (inq <= FR_TAN_OCT_HALF) {
+ /* First octant (0°-45°): direct u32 table lookup */
+ u32 idx = inq >> FR_TAN32_FRAC_BITS;
+ u32 frac = inq & FR_TAN32_FRAC_MASK;
+ u32 lo = gFR_TAN_TAB_Q[idx];
+ u32 delta = gFR_TAN_TAB_Q[idx + 1] - lo;
+ raw = lo + ((delta * frac) >> FR_TAN32_FRAC_BITS);
+ } else if (inq < FR_TAN_D64_THRESH) {
+ /* Second octant 45°-75°: variable-radix u16+shift */
+ u32 oct2 = inq - FR_TAN_OCT_HALF;
+ u32 idx = oct2 >> FR_TAN32_FRAC_BITS;
+ u32 frac = oct2 & FR_TAN32_FRAC_MASK;
+
+ u32 m_lo = gFR_TAN_MANT_Q2[idx];
+ u32 m_hi = gFR_TAN_MANT_Q2[idx + 1];
+ u32 s_lo = gFR_TAN_SHIFT_Q2[idx];
+ u32 s_hi = gFR_TAN_SHIFT_Q2[idx + 1];
+ u32 s_max = (s_hi > s_lo) ? s_hi : s_lo;
+
+ u32 a_lo = m_lo >> (s_max - s_lo);
+ u32 a_hi = m_hi >> (s_max - s_hi);
+ u32 delta = a_hi - a_lo;
+
+ raw = (a_lo + ((delta * frac) >> FR_TAN32_FRAC_BITS)) << s_max;
+ } else {
+ /* 75°-90°: sin/cos ratio from cosine table (one s64 division) */
+ s32 cos_val = cos_lerp_full(inq);
+ s32 sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq);
+ if (cos_val == 0)
+ raw = (u32)FR_TRIG_MAXVAL;
+ else
+ raw = (u32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val);
+ }
+
+ return (sign < 0) ? -(s32)raw : (s32)raw;
+}
+
+/*=======================================================
+ * fr_tan_bam32_d64 — tangent via sin/cos from the cosine table.
+ *
+ * Full-range sin/cos implementation kept for comparison.
+ * Computes sin(x)/cos(x) using the 129-entry cosine quadrant table.
+ * One s64 division per call.
+ */
+s32 fr_tan_bam32_d64(u16 bam)
+{
+ u32 q = ((u32)bam >> 14) & 0x3;
+ u32 inq = (u32)bam & 0x3FFFu;
+ s32 sign = 1;
+ s32 sin_val, cos_val;
+ s32 raw;
+
+ if (inq == 0 && (q == 0 || q == 2))
+ return 0;
+ if (inq == 0 && (q == 1 || q == 3))
+ return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+ if (q == 1 || q == 3) {
+ inq = 0x4000u - inq;
+ sign = -1;
+ }
+
+ cos_val = cos_lerp_full(inq);
+ sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq);
+
+ if (cos_val == 0)
+ raw = FR_TRIG_MAXVAL;
+ else {
+ raw = (s32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val);
+ }
+
+ return (sign < 0) ? -raw : raw;
+}
+
+/* fr_tan32: tan from radians at caller-specified radix. s15.16 result. */
+s32 fr_tan32(s32 rad, u16 radix)
+{
+ return fr_tan_bam32(fr_rad_to_bam(rad, radix));
+}
+
+/* fr_tan_deg32: tan from degrees at caller-specified radix. s15.16 result.
+ * radix 0 = integer degrees, radix > 0 = fixed-point degrees with that
+ * many fractional bits. s32 input so e.g. radix=16 gives s15.16 degrees. */
+s32 fr_tan_deg32(s32 deg, u16 radix)
+{
+ u16 bam = (radix == 0) ? FR_DEG2BAM_I((s16)deg)
+ : fr_deg_to_bam(deg, radix);
+ return fr_tan_bam32(bam);
+}
+
+/*=======================================================
+ * fr_atan_bam32 - Arctangent via binary search on the tan table.
+ *
+ * Input: positive ratio in s15.16 (caller handles signs/quadrants).
+ * Output: BAM angle (u16) in [0, 0x4000) representing [0, 90 deg).
+ *
+ * Algorithm:
+ * 1. If x <= 0: return 0.
+ * 2. If x >= table[127]: return near-pole BAM (saturate ~89.3 deg).
+ * 3. Binary search: 7 iterations on 128 entries to bracket.
+ * 4. Linear interpolation within bracket for 7 fractional bits.
+ * 5. Assemble: bam = (idx << 7) | frac.
+ */
+static u16 fr_atan_bam32(s32 x)
+{
+ s32 lo, hi, mid;
+ s32 idx, d, num, frac;
+ u32 ux;
+
+ if (x <= 0)
+ return 0;
+
+ ux = (u32)x;
+
+ /* Saturate near the pole */
+ if (ux >= gFR_TAN_TAB_Q[127])
+ return (u16)((127u << FR_TAN32_FRAC_BITS) + FR_TAN32_FRAC_MASK);
+
+ /* Binary search: find lo such that table[lo] <= ux < table[lo+1].
+ * The table is monotonically increasing. */
+ lo = 0;
+ hi = 127;
+ while (lo < hi) {
+ mid = (lo + hi + 1) >> 1;
+ if (gFR_TAN_TAB_Q[mid] <= ux)
+ lo = mid;
+ else
+ hi = mid - 1;
+ }
+
+ /* lo is now the index where table[lo] <= ux < table[lo+1]. */
+ idx = lo;
+
+ /* Linear interpolation within the bracket */
+ d = (s32)(gFR_TAN_TAB_Q[idx + 1] - gFR_TAN_TAB_Q[idx]);
+ num = (s32)(ux - gFR_TAN_TAB_Q[idx]);
+ if (d > 0)
+ frac = (s32)(((s64)num << FR_TAN32_FRAC_BITS) / d);
+ else
+ frac = 0;
+
+ if (frac > FR_TAN32_FRAC_MASK)
+ frac = FR_TAN32_FRAC_MASK;
+
+ return (u16)(((u32)idx << FR_TAN32_FRAC_BITS) + (u32)frac);
+}
+
+/*=======================================================
+ * fr_atan2_32 - Full-circle atan2 using the tan table binary search.
+ *
+ * Input: y, x as s32 values at radix 16 (s15.16).
+ * Output: radians at out_radix.
+ * Range: [-pi, pi]. Returns 0 for atan2(0, 0).
+ *
+ * Algorithm:
+ * 1. Handle axis cases.
+ * 2. Compute ratio = |y| / |x| or |x| / |y| (whichever <= 1.0) in s15.16.
+ * 3. Binary search -> BAM angle in [0, pi/4].
+ * 4. If |y| > |x|: angle = pi/2 - angle.
+ * 5. Apply quadrant from signs of x and y.
+ */
+s32 fr_atan2_32(s32 y, s32 x, u16 out_radix)
+{
+ s32 ax, ay, ratio;
+ u16 bam;
+ s32 angle;
+ s32 pi, half_pi;
+
+ pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
+ half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+
+ /* Axis cases */
+ if (x == 0) {
+ if (y > 0) return half_pi;
+ if (y < 0) return -half_pi;
+ return 0;
+ }
+ if (y == 0)
+ return (x > 0) ? 0 : pi;
+
+ ax = (x < 0) ? -x : x;
+ ay = (y < 0) ? -y : y;
+
+ /* Compute ratio in s15.16. Use the smaller/larger to stay in [0, 1.0]
+ * for the initial lookup, then complement if needed. */
+ if (ay <= ax) {
+ /* angle in [0, 45 deg]: ratio = ay/ax */
+ ratio = (s32)(((s64)ay << 16) / ax);
+ bam = fr_atan_bam32(ratio);
+ /* Convert BAM to radians at out_radix */
+ angle = FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
+ } else {
+ /* angle in (45, 90 deg): ratio = ax/ay, angle = pi/2 - atan(ratio) */
+ ratio = (s32)(((s64)ax << 16) / ay);
+ bam = fr_atan_bam32(ratio);
+ angle = half_pi - FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
+ }
+
+ /* Apply quadrant from signs of x and y */
+ if (x > 0)
+ return (y > 0) ? angle : -angle;
+ else
+ return (y > 0) ? (pi - angle) : (angle - pi);
+}
diff --git a/src/FR_tan_table.h b/src/FR_tan_table.h
new file mode 100644
index 0000000..bdee54c
--- /dev/null
+++ b/src/FR_tan_table.h
@@ -0,0 +1,115 @@
+/**
+ * @file FR_tan_table.h - tangent quadrant tables (u32, s15.16)
+ *
+ * Master table: gFR_TAN_TAB_Q[129]
+ * 129 entries covering [0, pi/2] in s15.16 fixed-point.
+ * table[i] = round(tan(i * pi/2 / 128) * 65536), i=0..127
+ * table[128] = 0x7FFFFFFF (pole saturation)
+ * 7-bit index + 7-bit lerp from 14-bit in-quadrant BAM.
+ *
+ * Used by:
+ * fr_tan_bam32(): entries 0-64 directly (first octant, 0°-45°)
+ * fr_atan_bam32(): all 129 entries for binary-search arctangent
+ *
+ * Second-octant variable-radix tables (derived from entries 64-128):
+ * gFR_TAN_MANT_Q2[65]: u16 mantissa (top 16 bits)
+ * gFR_TAN_SHIFT_Q2[65]: u8 shift (bits to left-shift)
+ * Used by fr_tan_bam32() for division-free 45°-90° path.
+ *
+ * Total ROM: 129×4 + 65×2 + 65×1 = 711 bytes
+ *
+ * @copy Copyright (C) <2001-2026>
+ * @author M A Chatterjee
+ *
+ * Same zlib license as the rest of the library.
+ */
+#ifndef __FR_TAN_TABLE_H__
+#define __FR_TAN_TABLE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef __FR_Platform_Defs_H__
+#include "FR_defs.h"
+#endif
+
+/* ── 129-entry table (used by atan binary search) ───────── */
+#define FR_TAN32_TABLE_BITS (7)
+#define FR_TAN32_TABLE_SIZE ((1 << FR_TAN32_TABLE_BITS) + 1) /* 129 */
+#define FR_TAN32_FRAC_BITS (14 - FR_TAN32_TABLE_BITS) /* 7 */
+#define FR_TAN32_FRAC_MAX (1 << FR_TAN32_FRAC_BITS) /* 128 */
+#define FR_TAN32_FRAC_MASK (FR_TAN32_FRAC_MAX - 1) /* 0x7F */
+#define FR_TAN32_FRAC_HALF (FR_TAN32_FRAC_MAX >> 1) /* 64 */
+#define FR_TAN32_QUADRANT (1 << 14) /* 16384 */
+
+static const u32 gFR_TAN_TAB_Q[FR_TAN32_TABLE_SIZE] = {
+ 0, 804, 1609, 2414,
+ 3220, 4026, 4834, 5644,
+ 6455, 7268, 8083, 8901,
+ 9721, 10545, 11372, 12202,
+ 13036, 13874, 14717, 15564,
+ 16416, 17273, 18136, 19005,
+ 19880, 20762, 21650, 22546,
+ 23449, 24360, 25280, 26208,
+ 27146, 28093, 29050, 30018,
+ 30996, 31986, 32988, 34002,
+ 35030, 36071, 37126, 38196,
+ 39281, 40382, 41500, 42636,
+ 43790, 44963, 46156, 47369,
+ 48605, 49863, 51145, 52451,
+ 53784, 55144, 56532, 57950,
+ 59398, 60880, 62395, 63947,
+ 65536, 67165, 68835, 70548,
+ 72308, 74116, 75974, 77887,
+ 79856, 81885, 83977, 86135,
+ 88365, 90670, 93054, 95523,
+ 98082, 100736, 103493, 106358,
+ 109340, 112447, 115687, 119071,
+ 122609, 126314, 130198, 134276,
+ 138564, 143081, 147847, 152884,
+ 158218, 163878, 169896, 176309,
+ 183161, 190499, 198380, 206870,
+ 216043, 225990, 236817, 248648,
+ 261634, 275959, 291845, 309568,
+ 329472, 351993, 377693, 407305,
+ 441808, 482534, 531352, 590958,
+ 665398, 761030, 888450, 1066730,
+ 1334016, 1779314, 2669641, 5340086,
+ 2147483647
+};
+
+/* ── Second-octant variable-radix tables (used by forward tan) ── */
+
+/* Mantissa table: top 16 bits of gFR_TAN_TAB_Q[64..128].
+ * gFR_TAN_MANT_Q2[i] = gFR_TAN_TAB_Q[64+i] >> gFR_TAN_SHIFT_Q2[i]
+ * 65 entries × 2 bytes = 130 bytes ROM.
+ */
+static const u16 gFR_TAN_MANT_Q2[65] = {
+ 32768, 33582, 34417, 35274, 36154, 37058, 37987, 38943,
+ 39928, 40942, 41988, 43067, 44182, 45335, 46527, 47761,
+ 49041, 50368, 51746, 53179, 54670, 56223, 57843, 59535,
+ 61304, 63157, 65099, 33569, 34641, 35770, 36961, 38221,
+ 39554, 40969, 42474, 44077, 45790, 47624, 49595, 51717,
+ 54010, 56497, 59204, 62162, 65408, 34494, 36480, 38696,
+ 41184, 43999, 47211, 50913, 55226, 60316, 33209, 36934,
+ 41587, 47564, 55528, 33335, 41688, 55603, 41713, 41719,
+ 65535
+};
+
+/* Shift table: bits to left-shift mantissa to reconstruct s15.16 value.
+ * 65 entries × 1 byte = 65 bytes ROM.
+ */
+static const u8 gFR_TAN_SHIFT_Q2[65] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7,
+ 15
+};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif /* __FR_TAN_TABLE_H__ */
diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h
index 03a34cd..983c4e2 100644
--- a/src/FR_trig_table.h
+++ b/src/FR_trig_table.h
@@ -60,10 +60,45 @@ static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = {
12539, 12167, 11793, 11417, 11039, 10659, 10278, 9896,
9512, 9126, 8739, 8351, 7962, 7571, 7179, 6786,
6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612,
- 3212, 2811, 2410, 2009, 1608, 1206, 804, 402,
+ 3212, 2811, 2410, 2009, 1608, 1206, 804, 401,
0
};
+/* ---- Tangent table: 65 entries covering one octant [0, pi/4] ----
+ *
+ * gFR_TAN_TAB_O[i] = round(tan(i * pi/4 / 64) * 32768) for i = 0..64
+ *
+ * Output format: u0.15 stored as u16 (unsigned, 15 fractional bits).
+ * Entry[64] = 32768 (tan 45° = 1.0 exactly) requires u16; it does not
+ * fit in s16.
+ *
+ * The table is used by fr_tan_bam() in FR_math.c. The first-octant
+ * lookup gives a raw result in u0.15 which is then shifted to s15.16.
+ * Second-octant (>45°) uses the reciprocal identity:
+ * tan(pi/2 - x) = 1/tan(x)
+ *
+ * 130 bytes ROM.
+ */
+#define FR_TAN_TABLE_BITS (6)
+#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) /* 65 */
+#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) /* 7 */
+#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) /* 128 */
+#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) /* 0x7F */
+#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) /* 64 */
+#define FR_TAN_OCTANT (1 << 13) /* 8192 */
+
+static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = {
+ 0, 402, 804, 1207, 1610, 2013, 2417, 2822,
+ 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101,
+ 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503,
+ 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104,
+ 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001,
+ 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318,
+ 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226,
+ 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973,
+ 32768
+};
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/tests/test_full_coverage.c b/tests/test_full_coverage.c
index 0dfd248..36c00f0 100644
--- a/tests/test_full_coverage.c
+++ b/tests/test_full_coverage.c
@@ -188,7 +188,7 @@ int test_div() {
int test_trig_complete() {
s16 result;
s32 result32;
-
+
/* Test CosI with all quadrants and edge cases */
result = FR_CosI(0);
result = FR_CosI(45);
@@ -199,51 +199,113 @@ int test_trig_complete() {
result = FR_CosI(270);
result = FR_CosI(315);
result = FR_CosI(360);
-
+
/* Test angles > 180 to hit the branch */
result = FR_CosI(200); /* > 180, will subtract 360 */
result = FR_CosI(350); /* > 180, will subtract 360 */
-
+
/* Test angles < -180 to hit that branch */
result = FR_CosI(-200); /* < -180, will add 360 */
result = FR_CosI(-350); /* < -180, will add 360 */
-
+
/* Test SinI */
result = FR_SinI(0);
result = FR_SinI(90);
result = FR_SinI(180);
result = FR_SinI(270);
-
+
/* Test FR_Cos with radix (interpolated) */
result = FR_Cos(45, 8);
result = FR_Cos(90, 8);
result = FR_Cos(180, 8);
-
+
/* Test FR_Sin with radix */
result = FR_Sin(45, 8);
result = FR_Sin(90, 8);
-
+
/* Test TanI with all special cases */
result32 = FR_TanI(0);
+ if (result32 != 0) return TEST_FAIL; /* tan(0°) = 0 */
result32 = FR_TanI(45);
- result32 = FR_TanI(90); /* Special case: returns max */
+ if (result32 != 65536) return TEST_FAIL; /* tan(45°) = 1.0 = 65536 */
+ result32 = FR_TanI(90);
+ if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max */
result32 = FR_TanI(135);
+ if (result32 != -65536) return TEST_FAIL; /* tan(135°) = -1.0 */
result32 = FR_TanI(180);
- result32 = FR_TanI(270); /* Special case: returns -max */
- result32 = FR_TanI(-45); /* Negative angle */
- result32 = FR_TanI(-90); /* Negative 90 */
+ if (result32 != 0) return TEST_FAIL; /* tan(180°) = 0 */
+ result32 = FR_TanI(270);
+ if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max (positive deg) */
+ result32 = FR_TanI(-45);
+ if (result32 != -65536) return TEST_FAIL; /* tan(-45°) = -1.0 */
+ result32 = FR_TanI(-90);
+ if (result32 != -FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: -max */
result32 = FR_TanI(200); /* > 180 */
result32 = FR_TanI(-200); /* < -180 */
-
+
/* Test FR_Tan with radix */
result32 = FR_Tan(45, 8);
result32 = FR_Tan(30, 8);
-
+
(void)result;
(void)result32;
return TEST_PASS;
}
+/* Test fr_tan_bam BAM-native tangent */
+int test_tan_bam() {
+ s32 result;
+
+ /* Exact zeros: 0° and 180° */
+ result = fr_tan_bam(0); /* 0° */
+ if (result != 0) return TEST_FAIL;
+ result = fr_tan_bam(0x8000); /* 180° */
+ if (result != 0) return TEST_FAIL;
+
+ /* Exact poles: 90° and 270° */
+ result = fr_tan_bam(0x4000); /* 90° = +pole */
+ if (result != FR_TRIG_MAXVAL) return TEST_FAIL;
+ result = fr_tan_bam(0xC000); /* 270° = -pole */
+ if (result != -FR_TRIG_MAXVAL) return TEST_FAIL;
+
+ /* 45° = 0x2000: tan(45°) = 1.0 = 65536 in s15.16 */
+ result = fr_tan_bam(0x2000);
+ if (result != 65536) return TEST_FAIL;
+
+ /* 135° = 0x6000: tan(135°) = -1.0 */
+ result = fr_tan_bam(0x6000);
+ if (result != -65536) return TEST_FAIL;
+
+ /* 225° = 0xA000: tan(225°) = 1.0 (same as 45°) */
+ result = fr_tan_bam(0xA000);
+ if (result != 65536) return TEST_FAIL;
+
+ /* 315° = 0xE000: tan(315°) = -1.0 */
+ result = fr_tan_bam(0xE000);
+ if (result != -65536) return TEST_FAIL;
+
+ /* 30° ≈ BAM 5461: tan(30°) = 1/sqrt(3) ≈ 0.57735 → 37837 in s15.16
+ * Allow ±50 LSB for table interpolation error */
+ result = fr_tan_bam(5461);
+ if (result < 37700 || result > 37950) return TEST_FAIL;
+
+ /* 60° ≈ BAM 10923: tan(60°) = sqrt(3) ≈ 1.73205 → 113512 in s15.16
+ * This exercises the second-octant (reciprocal) path. Allow ±200 LSB. */
+ result = fr_tan_bam(10923);
+ if (result < 113200 || result > 113800) return TEST_FAIL;
+
+ /* Near-pole: 89° ≈ BAM 16202: tan(89°) ≈ 57.29 → huge.
+ * Just verify it's large and positive. */
+ result = fr_tan_bam(16202);
+ if (result < 3000000) return TEST_FAIL; /* > 45.8 in s15.16 */
+
+ /* Near-pole: 91° ≈ BAM 16566: tan(91°) ≈ -57.29 → large negative */
+ result = fr_tan_bam(16566);
+ if (result > -3000000) return TEST_FAIL;
+
+ return TEST_PASS;
+}
+
/* Test inverse trig functions */
int test_inverse_trig() {
s32 result, input;
@@ -748,8 +810,8 @@ int test_edge_branches() {
* cos==0 and we hit the saturation return. */
r32 = FR_Tan(90, 0); /* bam=16384 (sin>0) */
if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL;
- r32 = FR_Tan(270, 0); /* bam=49152 (sin<0) */
- if (r32 != -FR_TRIG_MAXVAL) return TEST_FAIL;
+ r32 = FR_Tan(270, 0); /* pole: positive deg → +MAXVAL */
+ if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL;
/* FR_atan2 now returns radians at out_radix.
* At radix 16: pi/2 ≈ 102944, pi ≈ 205887.
@@ -1031,6 +1093,7 @@ int main() {
printf("\nTrigonometry (Complete):\n");
RUN_TEST(test_trig_complete);
+ RUN_TEST(test_tan_bam);
RUN_TEST(test_inverse_trig);
printf("\nLogarithms & Powers (Complete):\n");
diff --git a/tests/test_full_sweep.c b/tests/test_full_sweep.c
new file mode 100644
index 0000000..64d7365
--- /dev/null
+++ b/tests/test_full_sweep.c
@@ -0,0 +1,346 @@
+/**
+ * test_full_sweep.c — exhaustive error sweep for cos and tan (old & new)
+ *
+ * Three independent sweeps, each in its native input domain:
+ * BAM: all 65536 u16 values (0..65535)
+ * Radian: every s15.16 LSB from -2pi to +2pi (~823k values)
+ * Degree: fr_tan_deg32(s32,16) at s15.16, 1/1024 deg steps, ±360 deg (~738k)
+ * FR_Tan(s16,6) at s9.6 for old (s16 limits range)
+ * FR_TanI(deg) tested at integer-degree-aligned subset
+ *
+ * Error metrics:
+ * cos: % of full scale (1.0). |comp/65536 - ref| * 100
+ * tan: relative % when |ref| >= 0.01, else absolute % of 1.0
+ * Skipped when |ref| > 1000 (near-pole, unrepresentable in s15.16)
+ *
+ * Also reports average ns/call for each function.
+ */
+
+#include
+#include
+#include
+#include
+#include "FR_math.h"
+#include "FR_trig_table.h"
+#include "FR_tan_table.h"
+
+/* FR_tan32.c functions */
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan32(s32 rad, u16 radix);
+extern s32 fr_tan_deg32(s32 deg, u16 radix);
+
+/* ── sweep accumulator ─────────────────────────────── */
+
+typedef struct {
+ const char *name;
+ double peak_err;
+ double ref_at_peak;
+ s32 val_at_peak;
+ double sum_err;
+ long count;
+ char peak_label[64];
+ double total_ns;
+ long time_count;
+} sweep_t;
+
+static void sw_init(sweep_t *s, const char *name)
+{
+ memset(s, 0, sizeof(*s));
+ s->name = name;
+}
+
+static void sw_cos(sweep_t *s, double ref, s32 comp, const char *label)
+{
+ double comp_dbl = (double)comp / 65536.0;
+ double pct = fabs(comp_dbl - ref) * 100.0;
+ s->sum_err += pct;
+ s->count++;
+ if (pct > s->peak_err) {
+ s->peak_err = pct;
+ s->ref_at_peak = ref;
+ s->val_at_peak = comp;
+ strncpy(s->peak_label, label, sizeof(s->peak_label) - 1);
+ }
+}
+
+#define TAN_CLIP 1000.0
+#define TAN_ZERO 0.01
+
+static void sw_tan(sweep_t *s, double ref, s32 comp, const char *label)
+{
+ if (fabs(ref) > TAN_CLIP) return;
+ double comp_dbl = (double)comp / 65536.0;
+ double abs_err = fabs(comp_dbl - ref);
+ double pct = (fabs(ref) >= TAN_ZERO)
+ ? (abs_err / fabs(ref)) * 100.0
+ : abs_err * 100.0;
+ s->sum_err += pct;
+ s->count++;
+ if (pct > s->peak_err) {
+ s->peak_err = pct;
+ s->ref_at_peak = ref;
+ s->val_at_peak = comp;
+ strncpy(s->peak_label, label, sizeof(s->peak_label) - 1);
+ }
+}
+
+static double now_ns(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec;
+}
+
+static void sw_header(void)
+{
+ printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n",
+ "Function", "Peak Err", "Avg Err", "ns/call",
+ "Peak At", "Ref Value", "Got (s32)");
+ printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n",
+ "--------------------------", "----------", "----------", "-------",
+ "------------------------------", "------------", "----------");
+}
+
+static void sw_print(const sweep_t *s)
+{
+ double avg = (s->count > 0) ? s->sum_err / (double)s->count : 0.0;
+ double ns = (s->time_count > 0) ? s->total_ns / (double)s->time_count : 0.0;
+ printf("| %-26s | %9.4f%% | %9.5f%% | %5.1f | %-30s | %12.6f | %10d |\n",
+ s->name, s->peak_err, avg, ns, s->peak_label,
+ s->ref_at_peak, (int)s->val_at_peak);
+}
+
+/* ════════════════════════════════════════════════════════
+ * BAM sweep: all 65536 u16 values
+ * ════════════════════════════════════════════════════════ */
+static void sweep_bam(void)
+{
+ sweep_t cos_old, tan_old, tan_new;
+ sw_init(&cos_old, "fr_cos_bam");
+ sw_init(&tan_old, "fr_tan_bam (old)");
+ sw_init(&tan_new, "fr_tan_bam32 (new)");
+
+ for (long b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double rad = (double)bam * 2.0 * M_PI / 65536.0;
+ char label[64];
+ snprintf(label, sizeof(label), "BAM %5u (%7.2f deg)",
+ bam, (double)bam * 360.0 / 65536.0);
+
+ sw_cos(&cos_old, cos(rad), fr_cos_bam(bam), label);
+ sw_tan(&tan_old, tan(rad), fr_tan_bam(bam), label);
+ sw_tan(&tan_new, tan(rad), fr_tan_bam32(bam), label);
+ }
+
+ /* timing */
+ {
+ volatile s32 sink = 0;
+ double t0, t1;
+ long N = 65536;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_cos_bam((u16)b);
+ t1 = now_ns();
+ cos_old.total_ns = t1 - t0; cos_old.time_count = N;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b);
+ t1 = now_ns();
+ tan_old.total_ns = t1 - t0; tan_old.time_count = N;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b);
+ t1 = now_ns();
+ tan_new.total_ns = t1 - t0; tan_new.time_count = N;
+
+ (void)sink;
+ }
+
+ printf("### BAM domain — all 65536 u16 values\n\n");
+ sw_header();
+ sw_print(&cos_old);
+ sw_print(&tan_old);
+ sw_print(&tan_new);
+ printf("\ntan samples: old=%ld, new=%ld (rest skipped near poles)\n\n",
+ tan_old.count, tan_new.count);
+}
+
+/* ════════════════════════════════════════════════════════
+ * Radian sweep: every s15.16 LSB from -2pi to +2pi
+ * ════════════════════════════════════════════════════════ */
+static void sweep_rad(void)
+{
+ sweep_t cos_old, tan_old, tan_new;
+ sw_init(&cos_old, "fr_cos (s15.16)");
+ sw_init(&tan_old, "fr_tan (s15.16)");
+ sw_init(&tan_new, "fr_tan32 (s15.16)");
+
+ s32 two_pi = (s32)(2.0 * M_PI * 65536.0 + 0.5); /* 411775 */
+ long total = 0;
+
+ for (s32 r = -two_pi; r <= two_pi; r++) {
+ double rad = (double)r / 65536.0;
+ char label[64];
+ snprintf(label, sizeof(label), "r16=%d (%.4f rad)", r, rad);
+
+ sw_cos(&cos_old, cos(rad), fr_cos(r, 16), label);
+ sw_tan(&tan_old, tan(rad), fr_tan(r, 16), label);
+ sw_tan(&tan_new, tan(rad), fr_tan32(r, 16), label);
+ total++;
+ }
+
+ /* timing */
+ {
+ volatile s32 sink = 0;
+ double t0, t1;
+ long N = 65536;
+ s32 step = (2 * two_pi) / N;
+ if (step < 1) step = 1;
+
+ t0 = now_ns();
+ for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_cos(r, 16);
+ t1 = now_ns();
+ cos_old.total_ns = t1 - t0; cos_old.time_count = N;
+
+ t0 = now_ns();
+ for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan(r, 16);
+ t1 = now_ns();
+ tan_old.total_ns = t1 - t0; tan_old.time_count = N;
+
+ t0 = now_ns();
+ for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan32(r, 16);
+ t1 = now_ns();
+ tan_new.total_ns = t1 - t0; tan_new.time_count = N;
+
+ (void)sink;
+ }
+
+ printf("### Radian domain — every s15.16 LSB, -2pi..+2pi (%ld values)\n\n", total);
+ sw_header();
+ sw_print(&cos_old);
+ sw_print(&tan_old);
+ sw_print(&tan_new);
+ printf("\ntan samples: old=%ld, new=%ld\n\n", tan_old.count, tan_new.count);
+}
+
+/* ════════════════════════════════════════════════════════
+ * Degree sweep: all 65536 s16 values at radix 6 (s9.6)
+ * s15.16 degrees: every LSB from -360*65536 to +360*65536 (~823k values)
+ * FR_Tan(deg,16) — old, s16 input limits to ±0.5 deg (too narrow!)
+ * fr_tan_deg32(deg,16) — new, s32 input, full s15.16 range
+ * FR_TanI(deg) — integer degrees (tested at integer-aligned subset)
+ *
+ * NOTE: FR_Tan still takes s16, so its s15.16 sweep only covers ±0.5 deg.
+ * To get a fair comparison we ALSO test FR_Tan at radix=6 (s9.6, ±512 deg).
+ * ════════════════════════════════════════════════════════ */
+static void sweep_deg(void)
+{
+ sweep_t cos_old, tan_old_s16, tan_new_full, tan_old_int;
+ sw_init(&cos_old, "FR_Cos (s9.6 deg)");
+ sw_init(&tan_old_s16, "FR_Tan (s9.6 deg, s16)");
+ sw_init(&tan_new_full, "fr_tan_deg32 (s15.16 deg)");
+ sw_init(&tan_old_int, "FR_TanI (int deg)");
+
+ /* New path: s15.16 degrees, every LSB from -360 to +360.
+ * 360 * 65536 = 23592960. Total ~47M values — too many.
+ * Use same density as radian sweep: ~823k values.
+ * -360..+360 deg = 720 deg range. 823551 / 720 ≈ 1144 steps/deg.
+ * That's close to radix=10 (1024 steps/deg). Use radix=16 with
+ * step = 65536/1024 = 64 to get ~720k values. */
+ s32 deg360_s16 = 360L * 65536;
+ s32 step_new = 64; /* every 64th LSB of s15.16 = 1/1024 deg */
+ long total_new = 0;
+
+ for (s32 d = -deg360_s16; d <= deg360_s16; d += step_new) {
+ double deg_dbl = (double)d / 65536.0;
+ double rad = deg_dbl * M_PI / 180.0;
+ char label[64];
+ snprintf(label, sizeof(label), "d16=%d (%.4f deg)", (int)d, deg_dbl);
+
+ double rt = tan(rad);
+ sw_tan(&tan_new_full, rt, fr_tan_deg32(d, 16), label);
+
+ /* FR_TanI at integer-degree subset */
+ if (d % 65536 == 0) {
+ s16 ideg = (s16)(d / 65536);
+ char ilabel[64];
+ snprintf(ilabel, sizeof(ilabel), "deg=%d", ideg);
+ sw_tan(&tan_old_int, rt, FR_TanI(ideg), ilabel);
+ }
+
+ total_new++;
+ }
+
+ /* Old path: FR_Tan takes s16, so use radix=6 (s9.6) to cover ±512 deg */
+ long total_old = 0;
+ for (long d = -32768; d <= 32767; d++) {
+ s16 dval = (s16)d;
+ double deg_dbl = (double)d / 64.0;
+ double rad = deg_dbl * M_PI / 180.0;
+ char label[64];
+ snprintf(label, sizeof(label), "d6=%d (%.3f deg)", (int)d, deg_dbl);
+
+ sw_cos(&cos_old, cos(rad), FR_Cos(dval, 6), label);
+ sw_tan(&tan_old_s16, tan(rad), FR_Tan(dval, 6), label);
+ total_old++;
+ }
+
+ /* timing */
+ {
+ volatile s32 sink = 0;
+ double t0, t1;
+ long N = 65536;
+
+ t0 = now_ns();
+ for (long d = -32768; d <= 32767; d++) sink += FR_Cos((s16)d, 6);
+ t1 = now_ns();
+ cos_old.total_ns = t1 - t0; cos_old.time_count = N;
+
+ t0 = now_ns();
+ for (long d = -32768; d <= 32767; d++) sink += FR_Tan((s16)d, 6);
+ t1 = now_ns();
+ tan_old_s16.total_ns = t1 - t0; tan_old_s16.time_count = N;
+
+ s32 tstep = (2 * deg360_s16) / N;
+ t0 = now_ns();
+ for (s32 d = -deg360_s16; d <= deg360_s16; d += tstep)
+ sink += fr_tan_deg32(d, 16);
+ t1 = now_ns();
+ tan_new_full.total_ns = t1 - t0; tan_new_full.time_count = N;
+
+ t0 = now_ns();
+ for (long i = -360; i < 360; i++) sink += FR_TanI((s16)i);
+ t1 = now_ns();
+ tan_old_int.total_ns = t1 - t0; tan_old_int.time_count = 720;
+
+ (void)sink;
+ }
+
+ printf("### Degree domain\n\n");
+ printf("fr_tan_deg32: s32 input, radix=16, every 1/1024 deg, ±360 deg (%ld values)\n", total_new);
+ printf("FR_Tan: s16 input, radix=6 (s9.6), all 65536 s16 values (%ld values)\n\n", total_old);
+ sw_header();
+ sw_print(&cos_old);
+ sw_print(&tan_old_s16);
+ sw_print(&tan_new_full);
+ sw_print(&tan_old_int);
+ printf("\ntan samples: old_s16=%ld, new_s32=%ld, old_int=%ld\n\n",
+ tan_old_s16.count, tan_new_full.count, tan_old_int.count);
+}
+
+/* ── main ──────────────────────────────────────────── */
+
+int main(void)
+{
+ printf("FR_Math exhaustive error sweep: cos, tan (old), tan32 (new)\n");
+ printf("============================================================\n");
+ printf("cos: error = %% of full scale (1.0)\n");
+ printf("tan: relative %% when |ref|>=0.01, absolute when near zero, skip |ref|>1000\n\n");
+
+ sweep_bam();
+ sweep_rad();
+ sweep_deg();
+
+ printf("Done.\n");
+ return 0;
+}
diff --git a/tests/test_pole_table.c b/tests/test_pole_table.c
new file mode 100644
index 0000000..02a2829
--- /dev/null
+++ b/tests/test_pole_table.c
@@ -0,0 +1,92 @@
+/**
+ * test_pole_table.c — dump values around both tan poles (90° and 270°)
+ *
+ * For ±20 entries around each pole, show:
+ * BAM index, degrees, ground truth, and each function's output + error
+ */
+
+#include
+#include
+#include "FR_math.h"
+#include "FR_trig_table.h"
+#include "FR_tan_table.h"
+
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan32(s32 rad, u16 radix);
+extern s32 fr_tan_deg32(s32 deg, u16 radix);
+
+static double to_dbl(s32 v) { return (double)v / 65536.0; }
+
+static double err_pct(double ref, double got)
+{
+ double ae = fabs(got - ref);
+ if (fabs(ref) >= 0.01)
+ return (ae / fabs(ref)) * 100.0;
+ return ae * 100.0; /* absolute near zero */
+}
+
+static void dump_pole(u16 pole_bam, const char *name, int range)
+{
+ printf("\n### Pole at %s (BAM %u)\n\n", name, pole_bam);
+ printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n",
+ "BAM", "deg", "ground truth",
+ "tan_bam OLD", "err%",
+ "tan_bam32 NEW", "err%",
+ "tan(rad) NEW", "err%",
+ "tan(deg) NEW", "err%");
+ printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n",
+ "-----", "---------", "--------------",
+ "--------------", "-------",
+ "--------------", "-------",
+ "--------------", "-------",
+ "--------------", "-------");
+
+ for (int i = -range; i <= range; i++) {
+ u16 bam = (u16)((int)pole_bam + i);
+ double rad_dbl = (double)bam * 2.0 * M_PI / 65536.0;
+ double deg_dbl = (double)bam * 360.0 / 65536.0;
+ double truth = tan(rad_dbl);
+
+ /* BAM functions */
+ double v_bam_old = to_dbl(fr_tan_bam(bam));
+ double v_bam_new = to_dbl(fr_tan_bam32(bam));
+
+ /* Radian: convert BAM to s15.16 radian the same way the library does */
+ s32 r16 = (s32)(rad_dbl * 65536.0 + (rad_dbl >= 0 ? 0.5 : -0.5));
+ double v_rad_new = to_dbl(fr_tan32(r16, 16));
+
+ /* Degree: convert to s9.6 */
+ s16 d6 = (s16)(int)(deg_dbl * 64.0 + (deg_dbl >= 0 ? 0.5 : -0.5));
+ double v_deg_new = to_dbl(fr_tan_deg32(d6, 6));
+
+ /* Clip display for readability */
+ if (fabs(truth) > 100000.0) {
+ printf("| %5u | %9.3f | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n",
+ bam, deg_dbl, ">>pole<<",
+ "---", "---", "---", "---", "---", "---", "---", "---");
+ continue;
+ }
+
+ printf("| %5u | %9.3f | %14.4f | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% |\n",
+ bam, deg_dbl, truth,
+ v_bam_old, err_pct(truth, v_bam_old),
+ v_bam_new, err_pct(truth, v_bam_new),
+ v_rad_new, err_pct(truth, v_rad_new),
+ v_deg_new, err_pct(truth, v_deg_new));
+ }
+}
+
+int main(void)
+{
+ printf("FR_Math tan pole neighborhood dump\n");
+ printf("==================================\n");
+ printf("Values within ±20 BAM steps of each pole.\n");
+ printf("Error: relative %% when |ref|>=0.01, absolute otherwise.\n");
+
+ /* 90° pole = BAM 16384, 270° pole = BAM 49152 */
+ dump_pole(16384, "90 deg", 20);
+ dump_pole(49152, "270 deg", 20);
+
+ printf("\nDone.\n");
+ return 0;
+}
diff --git a/tests/test_sweep_csv.c b/tests/test_sweep_csv.c
new file mode 100644
index 0000000..5b33cbc
--- /dev/null
+++ b/tests/test_sweep_csv.c
@@ -0,0 +1,149 @@
+/**
+ * test_sweep_csv.c — emit CSV + summary for all 65536 BAM values
+ *
+ * Compares 3 tan implementations:
+ * fr_tan_bam (old): 65-entry u16 octant table + reciprocal
+ * fr_tan_bam32_d64: sin/cos from 129-entry cos table, s64 div
+ * fr_tan_bam32 (new): direct 65-entry u32 tan table lookup, no div
+ *
+ * Ground truth clamped to ±SAT_MAX for fair pole comparison.
+ *
+ * Output: build/tan_sweep.csv
+ */
+
+#include
+#include
+#include
+#include "FR_math.h"
+#include "FR_trig_table.h"
+#include "FR_tan_table.h"
+
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan_bam32_d64(u16 bam);
+
+#define SAT_MAX (32767.999984741211)
+
+static double to_dbl(s32 v) { return (double)v / 65536.0; }
+
+static double clamp(double v)
+{
+ if (v > SAT_MAX) return SAT_MAX;
+ if (v < -SAT_MAX) return -SAT_MAX;
+ return v;
+}
+
+static double err_pct(double ref, double got)
+{
+ if (fabs(ref) >= SAT_MAX && fabs(got) >= SAT_MAX)
+ return 0.0;
+ double ae = fabs(got - ref);
+ if (fabs(ref) >= 0.01)
+ return (ae / fabs(ref)) * 100.0;
+ return ae * 100.0;
+}
+
+static double now_ns(void)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec;
+}
+
+int main(void)
+{
+ FILE *fp = fopen("build/tan_sweep.csv", "w");
+ if (!fp) { perror("fopen"); return 1; }
+
+ fprintf(fp, "bam,degrees,tan_truth,"
+ "tan_old,tan_d64,tan_direct,"
+ "err_old,err_d64,err_direct\n");
+
+ for (long b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double deg = (double)bam * 360.0 / 65536.0;
+ double rad = (double)bam * 2.0 * M_PI / 65536.0;
+ double truth = clamp(tan(rad));
+
+ double v_old = to_dbl(fr_tan_bam(bam));
+ double v_d64 = to_dbl(fr_tan_bam32_d64(bam));
+ double v_direct = to_dbl(fr_tan_bam32(bam));
+
+ fprintf(fp, "%u,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f\n",
+ bam, deg, truth,
+ v_old, v_d64, v_direct,
+ err_pct(truth, v_old),
+ err_pct(truth, v_d64),
+ err_pct(truth, v_direct));
+ }
+
+ fclose(fp);
+
+ /* Timing */
+ volatile s32 sink = 0;
+ double t0, t1;
+ long N = 65536;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b);
+ t1 = now_ns();
+ double ns_old = (t1 - t0) / N;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_tan_bam32_d64((u16)b);
+ t1 = now_ns();
+ double ns_d64 = (t1 - t0) / N;
+
+ t0 = now_ns();
+ for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b);
+ t1 = now_ns();
+ double ns_direct = (t1 - t0) / N;
+
+ (void)sink;
+
+ /* Stats */
+ printf("Wrote build/tan_sweep.csv (65536 rows)\n\n");
+
+ double peak_old = 0, peak_d64 = 0, peak_dir = 0;
+ double sum_old = 0, sum_d64 = 0, sum_dir = 0;
+ int peak_bam_old = 0, peak_bam_d64 = 0, peak_bam_dir = 0;
+
+ for (long b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double rad = (double)bam * 2.0 * M_PI / 65536.0;
+ double truth = clamp(tan(rad));
+
+ double e_old = err_pct(truth, to_dbl(fr_tan_bam(bam)));
+ double e_d64 = err_pct(truth, to_dbl(fr_tan_bam32_d64(bam)));
+ double e_dir = err_pct(truth, to_dbl(fr_tan_bam32(bam)));
+
+ sum_old += e_old; sum_d64 += e_d64; sum_dir += e_dir;
+ if (e_old > peak_old) { peak_old = e_old; peak_bam_old = bam; }
+ if (e_d64 > peak_d64) { peak_d64 = e_d64; peak_bam_d64 = bam; }
+ if (e_dir > peak_dir) { peak_dir = e_dir; peak_bam_dir = bam; }
+ }
+
+ printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n",
+ "Implementation", "Table", "Peak Err", "Avg Err", "ns/call", "Peak At");
+ printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n",
+ "------------------------", "-----", "----------", "----------", "-------",
+ "------------------------");
+ printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n",
+ "fr_tan_bam (old)", "65u16",
+ peak_old, sum_old / 65536, ns_old,
+ peak_bam_old, peak_bam_old * 360.0 / 65536.0);
+ printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n",
+ "fr_tan_bam32_d64 (s/c)", "none",
+ peak_d64, sum_d64 / 65536, ns_d64,
+ peak_bam_d64, peak_bam_d64 * 360.0 / 65536.0);
+ printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n",
+ "fr_tan_bam32 (direct)", "65u32",
+ peak_dir, sum_dir / 65536, ns_direct,
+ peak_bam_dir, peak_bam_dir * 360.0 / 65536.0);
+
+ printf("\nOld: 65-entry u16 octant table + reciprocal (div in 2nd octant).\n");
+ printf("d64: sin/cos via 129-entry cos table, always s64 div.\n");
+ printf("Direct: 65-entry u32 quadrant tan table, lerp with shift, NO div.\n");
+
+ printf("\nDone.\n");
+ return 0;
+}
diff --git a/tests/test_tan32.c b/tests/test_tan32.c
new file mode 100644
index 0000000..ec13184
--- /dev/null
+++ b/tests/test_tan32.c
@@ -0,0 +1,424 @@
+/*
+ * test_tan32.c - Head-to-head comparison of LUT32 tan/atan2 vs current impls
+ *
+ * Compares:
+ * fr_tan_bam32() vs fr_tan_bam() — BAM accuracy + speed
+ * fr_tan32() vs fr_tan() — radian accuracy
+ * fr_tan_deg32() vs FR_TanI() — integer-degree accuracy
+ * fr_atan2_32() vs FR_atan2() — accuracy + speed
+ *
+ * Compile:
+ * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32.c -lm -o build/test_tan32
+ *
+ * @author M A Chatterjee
+ */
+
+#include
+#include
+#include
+#include
+#include "../src/FR_math.h"
+
+/* Declarations for the new LUT32 functions (in FR_tan32.c) */
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan32(s32 rad, u16 radix);
+extern s32 fr_tan_deg32(s32 deg, u16 radix);
+extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix);
+
+/*=======================================================
+ * Helpers
+ */
+static double fr2d(s32 val, int radix) {
+ return (double)val / (double)(1L << radix);
+}
+
+/*=======================================================
+ * Test 1: Tangent accuracy sweep — all 65536 BAM points
+ */
+static void test_tan_bam_accuracy(void)
+{
+ double max_err_old = 0.0, max_err_new = 0.0;
+ double sum_err_old = 0.0, sum_err_new = 0.0;
+ u16 max_bam_old = 0, max_bam_new = 0;
+ int count = 0;
+ u16 bam;
+
+ printf("## Tangent BAM Accuracy (65536 BAM points)\n\n");
+
+ for (bam = 0; bam < 0xFFFFu; bam++) {
+ double angle = (double)bam * 2.0 * M_PI / 65536.0;
+ double ref = tan(angle);
+ double old_val, new_val, err_old, err_new;
+
+ /* Skip near poles where tan -> infinity (within ~1 deg of 90/270) */
+ if (fabs(ref) > 500.0) continue;
+
+ old_val = fr2d(fr_tan_bam(bam), 16);
+ new_val = fr2d(fr_tan_bam32(bam), 16);
+
+ /* Percentage error relative to reference */
+ if (fabs(ref) > 0.001) {
+ err_old = fabs((old_val - ref) / ref) * 100.0;
+ err_new = fabs((new_val - ref) / ref) * 100.0;
+ } else {
+ /* Near zero, use absolute error scaled to % of 1.0 */
+ err_old = fabs(old_val - ref) * 100.0;
+ err_new = fabs(new_val - ref) * 100.0;
+ }
+
+ sum_err_old += err_old;
+ sum_err_new += err_new;
+ if (err_old > max_err_old) { max_err_old = err_old; max_bam_old = bam; }
+ if (err_new > max_err_new) { max_err_new = err_new; max_bam_new = bam; }
+ count++;
+ }
+
+ printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n");
+ printf("|----------------|----------------------|----------------------|\n");
+ printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new);
+ printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count);
+ printf("| Peak BAM | 0x%04X | 0x%04X |\n", max_bam_old, max_bam_new);
+ printf("| Points tested | %6d | %6d |\n", count, count);
+ printf("\n");
+}
+
+/*=======================================================
+ * Test 2: Tangent radian accuracy — sweep at radix 16
+ */
+static void test_tan_radian_accuracy(void)
+{
+ double max_err_old = 0.0, max_err_new = 0.0;
+ double sum_err_old = 0.0, sum_err_new = 0.0;
+ int count = 0;
+ int i;
+
+ printf("## Tangent Radian Accuracy (10000 points, radix 16)\n\n");
+
+ /* Sweep radians from -pi to pi in 10000 steps */
+ for (i = 0; i < 10000; i++) {
+ double angle = -M_PI + 2.0 * M_PI * (double)i / 10000.0;
+ double ref = tan(angle);
+ s32 rad16 = (s32)(angle * 65536.0);
+ double old_val, new_val, err_old, err_new;
+
+ if (fabs(ref) > 500.0) continue;
+
+ old_val = fr2d(fr_tan(rad16, 16), 16);
+ new_val = fr2d(fr_tan32(rad16, 16), 16);
+
+ if (fabs(ref) > 0.001) {
+ err_old = fabs((old_val - ref) / ref) * 100.0;
+ err_new = fabs((new_val - ref) / ref) * 100.0;
+ } else {
+ err_old = fabs(old_val - ref) * 100.0;
+ err_new = fabs(new_val - ref) * 100.0;
+ }
+
+ sum_err_old += err_old;
+ sum_err_new += err_new;
+ if (err_old > max_err_old) max_err_old = err_old;
+ if (err_new > max_err_new) max_err_new = err_new;
+ count++;
+ }
+
+ printf("| Metric | Current (fr_tan) | LUT32 (fr_tan32) |\n");
+ printf("|----------------|----------------------|----------------------|\n");
+ printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new);
+ printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count);
+ printf("| Points tested | %6d | %6d |\n", count, count);
+ printf("\n");
+}
+
+/*=======================================================
+ * Test 3: Tangent integer-degree accuracy — 0..359 degrees
+ */
+static void test_tan_degree_accuracy(void)
+{
+ double max_err_old = 0.0, max_err_new = 0.0;
+ double sum_err_old = 0.0, sum_err_new = 0.0;
+ int count = 0;
+ int deg;
+
+ printf("## Tangent Integer-Degree Accuracy (360 degrees)\n\n");
+
+ for (deg = 0; deg < 360; deg++) {
+ double angle = (double)deg * M_PI / 180.0;
+ double ref = tan(angle);
+ double old_val, new_val, err_old, err_new;
+
+ if (fabs(ref) > 500.0) continue;
+
+ old_val = fr2d(FR_TanI((s16)deg), 16);
+ new_val = fr2d(fr_tan_deg32((s16)deg, 0), 16);
+
+ if (fabs(ref) > 0.001) {
+ err_old = fabs((old_val - ref) / ref) * 100.0;
+ err_new = fabs((new_val - ref) / ref) * 100.0;
+ } else {
+ err_old = fabs(old_val - ref) * 100.0;
+ err_new = fabs(new_val - ref) * 100.0;
+ }
+
+ sum_err_old += err_old;
+ sum_err_new += err_new;
+ if (err_old > max_err_old) max_err_old = err_old;
+ if (err_new > max_err_new) max_err_new = err_new;
+ count++;
+ }
+
+ printf("| Metric | Current (FR_TanI) | LUT32 (fr_tan_deg32) |\n");
+ printf("|----------------|----------------------|----------------------|\n");
+ printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new);
+ printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count);
+ printf("| Points tested | %6d | %6d |\n", count, count);
+ printf("\n");
+}
+
+/*=======================================================
+ * Test 4: Tangent speed comparison (BAM)
+ */
+static void test_tan_speed(void)
+{
+ volatile s32 sink = 0;
+ clock_t start, end;
+ double old_ns, new_ns;
+ int iters = 1000000;
+ int i;
+
+ printf("## Tangent Speed (%d iterations)\n\n", iters);
+
+ /* Warm up */
+ for (i = 0; i < 1000; i++) sink += fr_tan_bam((u16)i);
+
+ start = clock();
+ for (i = 0; i < iters; i++)
+ sink += fr_tan_bam((u16)(i & 0xFFFF));
+ end = clock();
+ old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters;
+
+ start = clock();
+ for (i = 0; i < iters; i++)
+ sink += fr_tan_bam32((u16)(i & 0xFFFF));
+ end = clock();
+ new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters;
+
+ printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n");
+ printf("|----------------|----------------------|----------------------|\n");
+ printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns);
+ printf("\n");
+
+ (void)sink;
+}
+
+/*=======================================================
+ * Test 5: atan2 accuracy sweep — angles at multiple radii
+ */
+static void test_atan2_accuracy(void)
+{
+ double max_err_old = 0.0, max_err_new = 0.0;
+ double sum_err_old = 0.0, sum_err_new = 0.0;
+ int count = 0;
+ int ri, ai;
+ static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 };
+
+ printf("## atan2 Accuracy Sweep (5 radii x 65536 angles)\n\n");
+
+ for (ri = 0; ri < 5; ri++) {
+ double r = radii[ri];
+ for (ai = 0; ai < 65536; ai++) {
+ double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI;
+ double fx = r * cos(angle);
+ double fy = r * sin(angle);
+ s32 x = (s32)(fx * 65536.0);
+ s32 y = (s32)(fy * 65536.0);
+ double ref = atan2(fy, fx);
+ double old_val, new_val, err_old, err_new;
+
+ /* Skip degenerate */
+ if (x == 0 && y == 0) continue;
+
+ old_val = fr2d(FR_atan2(y, x, 16), 16);
+ new_val = fr2d(fr_atan2_32(y, x, 16), 16);
+
+ /* Absolute error in radians, wrapped to [-pi, pi] */
+ err_old = fabs(old_val - ref);
+ err_new = fabs(new_val - ref);
+ /* Handle wraparound near +/-pi: difference > pi means we
+ * crossed the branch cut; true angular error is 2*pi - diff */
+ if (err_old > M_PI) err_old = 2.0 * M_PI - err_old;
+ if (err_new > M_PI) err_new = 2.0 * M_PI - err_new;
+ /* Convert to % of pi for reporting */
+ err_old = err_old / M_PI * 100.0;
+ err_new = err_new / M_PI * 100.0;
+
+ sum_err_old += err_old;
+ sum_err_new += err_new;
+ if (err_old > max_err_old) max_err_old = err_old;
+ if (err_new > max_err_new) max_err_new = err_new;
+ count++;
+ }
+ }
+
+ printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n");
+ printf("|---------------------|----------------------|----------------------|\n");
+ printf("| Peak error (%% of pi)| %11.6f | %11.6f |\n", max_err_old, max_err_new);
+ printf("| Avg error (%% of pi) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count);
+ printf("| Points tested | %6d | %6d |\n", count, count);
+ printf("\n");
+}
+
+/*=======================================================
+ * Test 6: atan2 speed comparison
+ */
+static void test_atan2_speed(void)
+{
+ volatile s32 sink = 0;
+ clock_t start, end;
+ double old_ns, new_ns;
+ int iters = 500000;
+ int i;
+
+ printf("## atan2 Speed (%d iterations)\n\n", iters);
+
+ /* Pre-compute some x,y pairs */
+ s32 xs[256], ys[256];
+ for (i = 0; i < 256; i++) {
+ double angle = (double)i * 2.0 * M_PI / 256.0;
+ xs[i] = (s32)(10.0 * cos(angle) * 65536.0);
+ ys[i] = (s32)(10.0 * sin(angle) * 65536.0);
+ }
+
+ /* Warm up */
+ for (i = 0; i < 256; i++) sink += FR_atan2(ys[i], xs[i], 16);
+
+ start = clock();
+ for (i = 0; i < iters; i++)
+ sink += FR_atan2(ys[i & 0xFF], xs[i & 0xFF], 16);
+ end = clock();
+ old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters;
+
+ start = clock();
+ for (i = 0; i < iters; i++)
+ sink += fr_atan2_32(ys[i & 0xFF], xs[i & 0xFF], 16);
+ end = clock();
+ new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters;
+
+ printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n");
+ printf("|----------------|----------------------|----------------------|\n");
+ printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns);
+ printf("\n");
+
+ (void)sink;
+}
+
+/*=======================================================
+ * Test 7: Quick spot checks for correctness
+ */
+static int test_spot_checks(void)
+{
+ int fails = 0;
+ s32 v;
+
+ printf("## Spot Checks\n\n");
+
+ /* tan(0) = 0 */
+ v = fr_tan_bam32(0);
+ if (v != 0) { printf(" FAIL: tan_bam32(0) = %d, expected 0\n", v); fails++; }
+
+ /* tan(45 deg) = 1.0 = 65536 in s15.16 */
+ v = fr_tan_bam32(0x2000); /* 45 deg = 8192 BAM */
+ if (abs(v - 65536) > 2) { printf(" FAIL: tan_bam32(45deg) = %d, expected ~65536\n", v); fails++; }
+
+ /* tan(180 deg) = 0 */
+ v = fr_tan_bam32(0x8000);
+ if (v != 0) { printf(" FAIL: tan_bam32(180deg) = %d, expected 0\n", v); fails++; }
+
+ /* tan(90 deg) = pole */
+ v = fr_tan_bam32(0x4000);
+ if (v != FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(90deg) = %d, expected %d\n", v, FR_TRIG_MAXVAL); fails++; }
+
+ /* tan(270 deg) = -pole */
+ v = fr_tan_bam32(0xC000);
+ if (v != -FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(270deg) = %d, expected %d\n", v, -FR_TRIG_MAXVAL); fails++; }
+
+ /* Radian wrapper: tan(pi/4) = 1.0 */
+ {
+ s32 pi_4 = (s32)(M_PI / 4.0 * 65536.0);
+ v = fr_tan32(pi_4, 16);
+ if (abs(v - 65536) > 100) { printf(" FAIL: tan32(pi/4) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; }
+ }
+
+ /* Degree wrapper: tan(45) = 1.0 */
+ v = fr_tan_deg32(45, 0);
+ if (abs(v - 65536) > 100) { printf(" FAIL: tan_deg32(45) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; }
+
+ /* Degree wrapper: tan(0) = 0 */
+ v = fr_tan_deg32(0, 0);
+ if (v != 0) { printf(" FAIL: tan_deg32(0) = %d, expected 0\n", v); fails++; }
+
+ /* atan2(0, 1) = 0 */
+ v = fr_atan2_32(0, 65536, 16);
+ if (v != 0) { printf(" FAIL: atan2_32(0,1) = %d, expected 0\n", v); fails++; }
+
+ /* atan2(1, 0) = pi/2 */
+ {
+ s32 expected = FR_CHRDX(FR_kQ2RAD, FR_kPREC, 16);
+ v = fr_atan2_32(65536, 0, 16);
+ if (abs(v - expected) > 2) { printf(" FAIL: atan2_32(1,0) = %d, expected ~%d\n", v, expected); fails++; }
+ }
+
+ /* atan2(1, 1) = pi/4 */
+ {
+ double ref = M_PI / 4.0;
+ s32 expected = (s32)(ref * 65536.0);
+ v = fr_atan2_32(65536, 65536, 16);
+ if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(1,1) = %d (%.6f), expected ~%d (%.6f)\n",
+ v, fr2d(v, 16), expected, ref); fails++; }
+ }
+
+ /* atan2(-1, -1) = -3*pi/4 */
+ {
+ double ref = -3.0 * M_PI / 4.0;
+ s32 expected = (s32)(ref * 65536.0);
+ v = fr_atan2_32(-65536, -65536, 16);
+ if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(-1,-1) = %d (%.6f), expected ~%d (%.6f)\n",
+ v, fr2d(v, 16), expected, ref); fails++; }
+ }
+
+ if (fails == 0)
+ printf(" All spot checks PASSED\n");
+ else
+ printf(" %d spot check(s) FAILED\n", fails);
+ printf("\n");
+
+ return fails;
+}
+
+/*=======================================================
+ * Main
+ */
+int main(void)
+{
+ int fails;
+
+ printf("# FR_tan32 Head-to-Head Comparison Report\n\n");
+
+ fails = test_spot_checks();
+ test_tan_bam_accuracy();
+ test_tan_radian_accuracy();
+ test_tan_degree_accuracy();
+ test_tan_speed();
+ test_atan2_accuracy();
+ test_atan2_speed();
+
+ printf("## Summary\n\n");
+ printf("Design notes:\n");
+ printf(" - tan: sin/cos from the existing 129-entry cosine table (258B, already in ROM)\n");
+ printf(" No extra tan table needed for the forward path. One s64 division per call.\n");
+ printf(" Current uses octant table (130B) + reciprocal division for [45,90] deg.\n\n");
+ printf(" - atan2: binary search on 129-entry u32 tan table (516B) + quadrant mapping\n");
+ printf(" Current uses hypot_fast8 -> asin/acos chain (more code, no extra table)\n\n");
+ printf(" - Tan table (516B) needed only for atan2. Could be omitted if atan2 not used.\n\n");
+
+ return fails ? 1 : 0;
+}
diff --git a/tests/test_tan32_peaks.c b/tests/test_tan32_peaks.c
new file mode 100644
index 0000000..32e0437
--- /dev/null
+++ b/tests/test_tan32_peaks.c
@@ -0,0 +1,198 @@
+/*
+ * test_tan32_peaks.c - Find peak error locations and print ±20 entries around them
+ */
+#include
+#include
+#include
+#include "../src/FR_math.h"
+
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan32(s32 rad, u16 radix);
+extern s32 fr_tan_deg32(s32 deg, u16 radix);
+
+static double fr2d(s32 val, int radix) {
+ return (double)val / (double)(1L << radix);
+}
+
+static double tan_err(double val, double ref) {
+ if (fabs(ref) > 0.001)
+ return fabs((val - ref) / ref) * 100.0;
+ else
+ return fabs(val - ref) * 100.0;
+}
+
+/*=======================================================
+ * BAM peak finder + neighborhood dump
+ */
+static void peak_tan_bam(void)
+{
+ s32 bam;
+ s32 old_peak_bam = 0, new_peak_bam = 0;
+ double old_peak = 0, new_peak = 0;
+
+ /* Pass 1: find peaks */
+ for (bam = 0; bam < 65536; bam++) {
+ double angle = (double)bam * 2.0 * M_PI / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) continue;
+ double ov = fr2d(fr_tan_bam((u16)bam), 16);
+ double nv = fr2d(fr_tan_bam32((u16)bam), 16);
+ double oe = tan_err(ov, ref);
+ double ne = tan_err(nv, ref);
+ if (oe > old_peak) { old_peak = oe; old_peak_bam = bam; }
+ if (ne > new_peak) { new_peak = ne; new_peak_bam = bam; }
+ }
+
+ printf("## tan BAM: OLD peak at BAM %d (%.4f deg), NEW peak at BAM %d (%.4f deg)\n\n",
+ (int)old_peak_bam, old_peak_bam * 360.0 / 65536.0,
+ (int)new_peak_bam, new_peak_bam * 360.0 / 65536.0);
+
+ /* Pass 2: dump ±20 around OLD peak */
+ printf("### OLD peak neighborhood (BAM %d ± 20)\n\n", (int)old_peak_bam);
+ printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n");
+ printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n");
+ for (bam = old_peak_bam - 20; bam <= old_peak_bam + 20; bam++) {
+ u16 b = (u16)(bam & 0xFFFF);
+ double angle = (double)b * 2.0 * M_PI / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; }
+ double ov = fr2d(fr_tan_bam(b), 16);
+ double nv = fr2d(fr_tan_bam32(b), 16);
+ printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n",
+ bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref),
+ (bam == old_peak_bam) ? " <-- OLD PEAK" : (bam == new_peak_bam) ? " <-- NEW PEAK" : "");
+ }
+
+ if (abs((int)(new_peak_bam - old_peak_bam)) > 25) {
+ printf("\n### NEW peak neighborhood (BAM %d ± 20)\n\n", (int)new_peak_bam);
+ printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n");
+ printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n");
+ for (bam = new_peak_bam - 20; bam <= new_peak_bam + 20; bam++) {
+ u16 b = (u16)(bam & 0xFFFF);
+ double angle = (double)b * 2.0 * M_PI / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; }
+ double ov = fr2d(fr_tan_bam(b), 16);
+ double nv = fr2d(fr_tan_bam32(b), 16);
+ printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n",
+ bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref),
+ (bam == new_peak_bam) ? " <-- NEW PEAK" : "");
+ }
+ }
+
+ printf("\n");
+}
+
+/*=======================================================
+ * Radian peak finder + neighborhood dump
+ */
+static void peak_tan_rad(void)
+{
+ s32 rad16;
+ s32 old_peak_r = 0, new_peak_r = 0;
+ double old_peak = 0, new_peak = 0;
+
+ for (rad16 = -65536; rad16 <= 65535; rad16++) {
+ double angle = (double)rad16 / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) continue;
+ double ov = fr2d(fr_tan(rad16, 16), 16);
+ double nv = fr2d(fr_tan32(rad16, 16), 16);
+ double oe = tan_err(ov, ref);
+ double ne = tan_err(nv, ref);
+ if (oe > old_peak) { old_peak = oe; old_peak_r = rad16; }
+ if (ne > new_peak) { new_peak = ne; new_peak_r = rad16; }
+ }
+
+ printf("## tan Radian: OLD peak at r16=%d (%.6f rad, %.4f deg), NEW peak at r16=%d (%.6f rad, %.4f deg)\n\n",
+ (int)old_peak_r, old_peak_r / 65536.0, old_peak_r / 65536.0 * 180.0 / M_PI,
+ (int)new_peak_r, new_peak_r / 65536.0, new_peak_r / 65536.0 * 180.0 / M_PI);
+
+ /* dump around OLD peak */
+ printf("### OLD peak neighborhood (r16=%d ± 20)\n\n", (int)old_peak_r);
+ printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n");
+ printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n");
+ for (rad16 = old_peak_r - 20; rad16 <= old_peak_r + 20; rad16++) {
+ double angle = (double)rad16 / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) continue;
+ double ov = fr2d(fr_tan(rad16, 16), 16);
+ double nv = fr2d(fr_tan32(rad16, 16), 16);
+ printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n",
+ (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref),
+ (rad16 == old_peak_r) ? " <-- OLD PEAK" : (rad16 == new_peak_r) ? " <-- NEW PEAK" : "");
+ }
+
+ if (abs((int)(new_peak_r - old_peak_r)) > 25) {
+ printf("\n### NEW peak neighborhood (r16=%d ± 20)\n\n", (int)new_peak_r);
+ printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n");
+ printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n");
+ for (rad16 = new_peak_r - 20; rad16 <= new_peak_r + 20; rad16++) {
+ double angle = (double)rad16 / 65536.0;
+ double ref = tan(angle);
+ if (fabs(ref) > 500.0) continue;
+ double ov = fr2d(fr_tan(rad16, 16), 16);
+ double nv = fr2d(fr_tan32(rad16, 16), 16);
+ printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n",
+ (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref),
+ (rad16 == new_peak_r) ? " <-- NEW PEAK" : "");
+ }
+ }
+
+ printf("\n");
+}
+
+/*=======================================================
+ * Degree peak finder + neighborhood dump
+ */
+static void peak_tan_deg(void)
+{
+ s16 deg;
+ s16 old_peak_d = 0, new_peak_d = 0;
+ double old_peak = 0, new_peak = 0;
+
+ for (deg = -180; deg <= 179; deg++) {
+ double ref = tan((double)deg * M_PI / 180.0);
+ if (fabs(ref) > 500.0) continue;
+ double ov = fr2d(FR_TanI(deg), 16);
+ double nv = fr2d(fr_tan_deg32(deg, 0), 16);
+ double oe = tan_err(ov, ref);
+ double ne = tan_err(nv, ref);
+ if (oe > old_peak) { old_peak = oe; old_peak_d = deg; }
+ if (ne > new_peak) { new_peak = ne; new_peak_d = deg; }
+ }
+
+ printf("## tan Degree: OLD peak at %d deg, NEW peak at %d deg\n\n",
+ (int)old_peak_d, (int)new_peak_d);
+
+ /* dump full range around both peaks, ±20 deg */
+ s16 lo = old_peak_d < new_peak_d ? old_peak_d : new_peak_d;
+ s16 hi = old_peak_d > new_peak_d ? old_peak_d : new_peak_d;
+ lo = (lo - 20 < -180) ? -180 : lo - 20;
+ hi = (hi + 20 > 179) ? 179 : hi + 20;
+
+ printf("### Neighborhood (%d .. %d deg)\n\n", (int)lo, (int)hi);
+ printf("| deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n");
+ printf("|------|----------------|----------------|-------------|----------------|-------------|\n");
+ for (deg = lo; deg <= hi; deg++) {
+ double ref = tan((double)deg * M_PI / 180.0);
+ if (fabs(ref) > 500.0) { printf("| %4d | (pole) | | | | |\n", (int)deg); continue; }
+ double ov = fr2d(FR_TanI(deg), 16);
+ double nv = fr2d(fr_tan_deg32(deg, 0), 16);
+ printf("| %4d | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n",
+ (int)deg, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref),
+ (deg == old_peak_d && deg == new_peak_d) ? " <-- BOTH PEAK" :
+ (deg == old_peak_d) ? " <-- OLD PEAK" :
+ (deg == new_peak_d) ? " <-- NEW PEAK" : "");
+ }
+ printf("\n");
+}
+
+int main(void)
+{
+ printf("# Peak Error Neighborhoods for Tangent Functions\n\n");
+ peak_tan_bam();
+ peak_tan_rad();
+ peak_tan_deg();
+ return 0;
+}
diff --git a/tests/test_tan32_sweep.c b/tests/test_tan32_sweep.c
new file mode 100644
index 0000000..99dc83f
--- /dev/null
+++ b/tests/test_tan32_sweep.c
@@ -0,0 +1,318 @@
+/*
+ * test_tan32_sweep.c - Comprehensive -65536..+65536 sweep for all tan/atan functions
+ *
+ * Generates a single comparison table: old vs new, BAM / radian / degree,
+ * with peak error, avg error, and speed for each function.
+ *
+ * Compile:
+ * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32_sweep.c -lm -o build/test_tan32_sweep
+ *
+ * @author M A Chatterjee
+ */
+
+#include
+#include
+#include
+#include
+#include "../src/FR_math.h"
+
+extern s32 fr_tan_bam32(u16 bam);
+extern s32 fr_tan32(s32 rad, u16 radix);
+extern s32 fr_tan_deg32(s32 deg, u16 radix);
+extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix);
+
+static double fr2d(s32 val, int radix) {
+ return (double)val / (double)(1L << radix);
+}
+
+typedef struct {
+ double peak_err;
+ double sum_err;
+ int count;
+} stats_t;
+
+static void stats_init(stats_t *s) { s->peak_err = 0; s->sum_err = 0; s->count = 0; }
+static void stats_add(stats_t *s, double err) {
+ if (err > s->peak_err) s->peak_err = err;
+ s->sum_err += err;
+ s->count++;
+}
+static double stats_avg(stats_t *s) { return s->count > 0 ? s->sum_err / s->count : 0; }
+
+/*=======================================================
+ * Speed measurement helper
+ */
+static double measure_ns(void (*fn)(volatile s32 *sink, int n), int n) {
+ volatile s32 sink = 0;
+ clock_t start, end;
+ /* warm up */
+ fn(&sink, n / 10);
+ start = clock();
+ fn(&sink, n);
+ end = clock();
+ return (double)(end - start) / CLOCKS_PER_SEC * 1e9 / n;
+}
+
+/* Speed test functions */
+static void speed_tan_bam_old(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_tan_bam((u16)(i & 0xFFFF));
+}
+static void speed_tan_bam_new(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_tan_bam32((u16)(i & 0xFFFF));
+}
+static void speed_tan_rad_old(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_tan((s32)((i * 7) - n * 3), 16);
+}
+static void speed_tan_rad_new(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_tan32((s32)((i * 7) - n * 3), 16);
+}
+static void speed_tan_deg_old(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += FR_TanI((s16)(i % 360));
+}
+static void speed_tan_deg_new(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_tan_deg32((s16)(i % 360), 0);
+}
+
+static s32 g_xs[256], g_ys[256];
+static void init_atan_data(void) {
+ int i;
+ for (i = 0; i < 256; i++) {
+ double a = (double)i * 2.0 * M_PI / 256.0;
+ g_xs[i] = (s32)(10.0 * cos(a) * 65536.0);
+ g_ys[i] = (s32)(10.0 * sin(a) * 65536.0);
+ }
+}
+static void speed_atan2_old(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += FR_atan2(g_ys[i & 0xFF], g_xs[i & 0xFF], 16);
+}
+static void speed_atan2_new(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += fr_atan2_32(g_ys[i & 0xFF], g_xs[i & 0xFF], 16);
+}
+static void speed_atan_old(volatile s32 *sink, int n) {
+ int i; for (i = 0; i < n; i++) *sink += FR_atan((s32)((i * 13) - n * 6), 16, 16);
+}
+static void speed_atan_new(volatile s32 *sink, int n) {
+ /* FR_atan(x, r, or) = FR_atan2(x, 1< 500.0) continue;
+
+ ov = fr2d(fr_tan_bam((u16)bam), 16);
+ nv = fr2d(fr_tan_bam32((u16)bam), 16);
+
+ if (fabs(ref) > 0.001) {
+ oe = fabs((ov - ref) / ref) * 100.0;
+ ne = fabs((nv - ref) / ref) * 100.0;
+ } else {
+ oe = fabs(ov - ref) * 100.0;
+ ne = fabs(nv - ref) * 100.0;
+ }
+ stats_add(old_s, oe);
+ stats_add(new_s, ne);
+ }
+}
+
+static void sweep_tan_rad(stats_t *old_s, stats_t *new_s)
+{
+ s32 rad16;
+ stats_init(old_s);
+ stats_init(new_s);
+
+ /* Sweep s15.16 radians from -65536 to +65535 (= -1.0 to +1.0 rad ≈ ±57 deg).
+ * Step by 1 LSB = full 131072-point sweep. */
+ for (rad16 = -65536; rad16 <= 65535; rad16++) {
+ double angle = (double)rad16 / 65536.0;
+ double ref = tan(angle);
+ double ov, nv, oe, ne;
+ if (fabs(ref) > 500.0) continue;
+
+ ov = fr2d(fr_tan(rad16, 16), 16);
+ nv = fr2d(fr_tan32(rad16, 16), 16);
+
+ if (fabs(ref) > 0.001) {
+ oe = fabs((ov - ref) / ref) * 100.0;
+ ne = fabs((nv - ref) / ref) * 100.0;
+ } else {
+ oe = fabs(ov - ref) * 100.0;
+ ne = fabs(nv - ref) * 100.0;
+ }
+ stats_add(old_s, oe);
+ stats_add(new_s, ne);
+ }
+}
+
+static void sweep_tan_deg(stats_t *old_s, stats_t *new_s)
+{
+ s16 deg;
+ stats_init(old_s);
+ stats_init(new_s);
+
+ for (deg = -180; deg <= 179; deg++) {
+ double ref = tan((double)deg * M_PI / 180.0);
+ double ov, nv, oe, ne;
+ if (fabs(ref) > 500.0) continue;
+
+ ov = fr2d(FR_TanI(deg), 16);
+ nv = fr2d(fr_tan_deg32(deg, 0), 16);
+
+ if (fabs(ref) > 0.001) {
+ oe = fabs((ov - ref) / ref) * 100.0;
+ ne = fabs((nv - ref) / ref) * 100.0;
+ } else {
+ oe = fabs(ov - ref) * 100.0;
+ ne = fabs(nv - ref) * 100.0;
+ }
+ stats_add(old_s, oe);
+ stats_add(new_s, ne);
+ }
+}
+
+/*=======================================================
+ * Atan sweeps
+ */
+static void sweep_atan2(stats_t *old_s, stats_t *new_s)
+{
+ int ri, ai;
+ static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 };
+ stats_init(old_s);
+ stats_init(new_s);
+
+ for (ri = 0; ri < 5; ri++) {
+ double r = radii[ri];
+ for (ai = 0; ai < 65536; ai++) {
+ double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI;
+ double fx = r * cos(angle), fy = r * sin(angle);
+ s32 x = (s32)(fx * 65536.0), y = (s32)(fy * 65536.0);
+ double ref = atan2(fy, fx);
+ double ov, nv, oe, ne;
+ if (x == 0 && y == 0) continue;
+
+ ov = fr2d(FR_atan2(y, x, 16), 16);
+ nv = fr2d(fr_atan2_32(y, x, 16), 16);
+
+ oe = fabs(ov - ref); ne = fabs(nv - ref);
+ if (oe > M_PI) oe = 2.0 * M_PI - oe;
+ if (ne > M_PI) ne = 2.0 * M_PI - ne;
+ oe = oe / M_PI * 100.0;
+ ne = ne / M_PI * 100.0;
+
+ stats_add(old_s, oe);
+ stats_add(new_s, ne);
+ }
+ }
+}
+
+static void sweep_atan(stats_t *old_s, stats_t *new_s)
+{
+ s32 x16;
+ stats_init(old_s);
+ stats_init(new_s);
+
+ /* Sweep atan input from -65536 to +65535 (= -1.0 to +1.0 in s15.16).
+ * Step by 8 to keep runtime reasonable (16384 points).
+ * Error metric: absolute angular error as % of pi/2 (atan range). */
+ for (x16 = -65536; x16 <= 65535; x16 += 8) {
+ double xf = (double)x16 / 65536.0;
+ double ref = atan(xf);
+ double ov, nv, oe, ne;
+
+ ov = fr2d(FR_atan(x16, 16, 16), 16);
+ nv = fr2d(fr_atan2_32(x16, 65536, 16), 16);
+
+ /* Use absolute angular error / (pi/2) * 100, same approach as atan2 */
+ oe = fabs(ov - ref) / (M_PI / 2.0) * 100.0;
+ ne = fabs(nv - ref) / (M_PI / 2.0) * 100.0;
+
+ stats_add(old_s, oe);
+ stats_add(new_s, ne);
+ }
+}
+
+/*=======================================================
+ * Main
+ */
+int main(void)
+{
+ stats_t old_s, new_s;
+ double old_ns, new_ns;
+ int N = 1000000;
+
+ init_atan_data();
+
+ printf("# Comprehensive Function Comparison: Old vs New\n\n");
+ printf("Sweep range: full domain for each input type\n");
+ printf("Error metric: relative %% (or absolute*100 near zero)\n");
+ printf("Speed: ns/call on this platform\n\n");
+
+ printf("## Tangent Functions\n\n");
+ printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n");
+ printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n");
+
+ sweep_tan_bam(&old_s, &new_s);
+ old_ns = measure_ns(speed_tan_bam_old, N);
+ new_ns = measure_ns(speed_tan_bam_new, N);
+ printf("| tan_bam (BAM) | OLD | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n",
+ old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns);
+ printf("| tan_bam32 (BAM) | NEW | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n",
+ new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns);
+
+ sweep_tan_rad(&old_s, &new_s);
+ old_ns = measure_ns(speed_tan_rad_old, N);
+ new_ns = measure_ns(speed_tan_rad_new, N);
+ printf("| fr_tan (rad@r16) | OLD | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n",
+ old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns);
+ printf("| fr_tan32 (rad@r16) | NEW | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n",
+ new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns);
+
+ sweep_tan_deg(&old_s, &new_s);
+ old_ns = measure_ns(speed_tan_deg_old, N);
+ new_ns = measure_ns(speed_tan_deg_new, N);
+ printf("| FR_TanI (deg) | OLD | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n",
+ old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns);
+ printf("| fr_tan_deg32 (deg) | NEW | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n",
+ new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns);
+
+ printf("\n## Inverse Tangent Functions\n\n");
+ printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n");
+ printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n");
+
+ sweep_atan2(&old_s, &new_s);
+ old_ns = measure_ns(speed_atan2_old, N / 2);
+ new_ns = measure_ns(speed_atan2_new, N / 2);
+ printf("| FR_atan2 (s15.16) | OLD | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n",
+ old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns);
+ printf("| fr_atan2_32(s15.16)| NEW | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n",
+ new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns);
+
+ sweep_atan(&old_s, &new_s);
+ old_ns = measure_ns(speed_atan_old, N / 2);
+ new_ns = measure_ns(speed_atan_new, N / 2);
+ printf("| FR_atan (s15.16) | OLD | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n",
+ old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns);
+ printf("| atan2_32(x,1) eq. | NEW | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n",
+ new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns);
+
+ printf("\n## Notes\n\n");
+ printf("- BAM sweep: 0..65535 (full circle, excludes |tan|>500 near poles)\n");
+ printf("- Radian sweep: -65536..+65535 at radix 16 = -1.0..+1.0 rad = +/-57.3 deg\n");
+ printf("- Degree sweep: -180..+179 integer degrees\n");
+ printf("- atan2 error: %% of pi (angular error / pi * 100)\n");
+ printf("- atan error: absolute angular error / (pi/2) * 100%%\n");
+ printf("- atan2_32(x,1) is used as the NEW atan since it's equivalent to atan(x)\n");
+
+ return 0;
+}
diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp
index 5a70a0a..3daaff9 100644
--- a/tests/test_tdd.cpp
+++ b/tests/test_tdd.cpp
@@ -89,8 +89,7 @@ static void stats_add(stats_t *s, double in, double actual, double expected) {
s->worst_expected = expected;
}
s->sum_abs_err += e;
- /* Skip percent error when expected ≈ 0 to avoid division artifacts */
- double pct = (fabs(expected) > 0.01) ? (e / fabs(expected)) * 100.0 : 0.0;
+ double pct = (expected != 0.0) ? (e / fabs(expected)) * 100.0 : (e != 0.0 ? 100.0 : 0.0);
if (pct > s->max_pct_err) {
s->max_pct_err = pct;
s->worst_pct_input = in;
@@ -109,6 +108,21 @@ static double stats_mean_pct(const stats_t *s) {
return s->n ? s->sum_pct_err / s->n : 0.0;
}
+/* Quantize a double to s15.16 resolution (same grid as library output). */
+static inline double q16(double x) {
+ return floor(x * 65536.0 + 0.5) / 65536.0;
+}
+
+/* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */
+static const double TAN_CLAMP = (double)0x7fffffff / (double)(1L << 16);
+
+static double tan_ref(double rad) {
+ double t = tan(rad);
+ if (t > TAN_CLAMP) return TAN_CLAMP;
+ if (t < -TAN_CLAMP) return -TAN_CLAMP;
+ return t;
+}
+
/* Set by FR_SHOWPEAK env var — adds a "Peak at" column to the accuracy table */
static int g_showpeak = 0;
@@ -1725,10 +1739,11 @@ static void section_summary(void) {
printf("| FR_FixMulSat | OK | 4.2, 4.3 | int64 fast path with round-to-nearest and explicit saturation |\n");
printf("| FR_FixAddSat | OK | 4.4, 4.5 | Saturation behaves identically on LP64 host and ILP32 MCU |\n");
printf("| FR_CosI / FR_SinI | OK | 5 | s15.16 output; exact at poles; max abs error ~1.5e-5 (1 LSB s15.16) over [-720, +720]; macros routing to fr_*_bam |\n");
- printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | Routed through BAM trig |\n");
+ printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | BAM table lookup; 65-entry octant table; no 64-bit division |\n");
printf("| FR_Cos / FR_Sin (interpolated) | OK | 6.1 | Within LSB-level error for r8 inputs in s16 |\n");
- printf("| FR_Tan (interpolated) | OK | 6.2 | Locals are s32 |\n");
+ printf("| FR_Tan (interpolated) | OK | 6.2 | Via fr_tan_bam; 65-entry octant table |\n");
printf("| fr_cos / fr_sin / fr_cos_bam / fr_sin_bam / fr_cos_deg / fr_sin_deg | OK | 6 | s15.16 output; 129-entry quadrant table with round-to-nearest linear interp; exact at cardinal angles |\n");
+ printf("| fr_tan_bam | OK | 14 | 65-entry octant table; first-octant lerp, second-octant 32-bit reciprocal; no 64-bit |\n");
printf("| FR_acos | OK | 7.1 | Max error ~0.83° over [-1, +1] swept at 200 points |\n");
printf("| FR_asin | OK | 7.2 | Same precision as FR_acos |\n");
printf("| FR_atan2 | OK | 7.3 | Via asin/acos + hypot_fast8; 129-entry cos table; `FR_atan2(y, x, out_radix)` returns radians |\n");
@@ -1796,51 +1811,98 @@ static void section_accuracy_table(void) {
/* Persistent stats so we can print diagnostics after the table */
stats_t st_sincos, st_tan, st_asincos, st_atan2;
+ stats_t st_rad2bam, st_deg2bam, st_sincos_deg_s32, st_tan_deg_s32;
stats_reset(&st_sincos); stats_reset(&st_tan);
stats_reset(&st_asincos); stats_reset(&st_atan2);
+ stats_reset(&st_rad2bam); stats_reset(&st_deg2bam);
+ stats_reset(&st_sincos_deg_s32); stats_reset(&st_tan_deg_s32);
+
+ /* --- sin / cos (BAM native: 65536-pt) --- */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ u16 bam = (u16)i;
+ double rad = bam * 2.0 * M_PI / 65536.0;
+ stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)));
+ stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ }
+ acc_row("sin/cos (BAM)", &st, "fr_sin_bam/fr_cos_bam direct; 129-entry table");
+ }
- /* --- sin / cos --- */
+ /* --- sin / cos (degree wrappers: 65536-pt) --- */
{
stats_t &st = st_sincos;
const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */
- /* 65536-point sweep: all s16 values at radix 7 cover > full circle */
for (int i = -32768; i <= 32767; i++) {
double deg = (double)i / (1 << radix);
double rad = deg * M_PI / 180.0;
- stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), sin(rad));
- stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), cos(rad));
+ stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), q16(sin(rad)));
+ stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), q16(cos(rad)));
}
- /* Special cases: exact integer degrees including negative */
s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360,
-30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360};
for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
s16 d = specials[si];
double rad = d * M_PI / 180.0;
- stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), sin(rad));
- stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), cos(rad));
+ stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)));
+ stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ }
+ acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±256° (s16 at radix 7; FR_DEG2BAM)");
+ }
+
+ /* --- sin / cos (radian wrappers: 65536-pt) --- */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+ s32 rad_fp = (s32)(angle * (1L << 16));
+ stats_add(&st, angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(angle)));
+ stats_add(&st, angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(angle)));
+ }
+ acc_row("sin/cos (rad)", &st, "fr_sin/fr_cos via fr_rad_to_bam ±2π r16");
+ }
+
+ /* --- tan (BAM native: 65536-pt, full sweep) --- */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ u16 bam = (u16)i;
+ double ref;
+ if (bam == 16384) ref = TAN_CLAMP; /* 90°: +maxint */
+ else if (bam == 49152) ref = -TAN_CLAMP; /* 270°: -maxint */
+ else ref = tan_ref(bam * 2.0 * M_PI / 65536.0);
+ stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref));
}
- acc_row("sin / cos", &st, "65536-pt sweep + specials");
+ acc_row("tan (BAM)", &st, "fr_tan_bam 65536-pt full; ±maxint at poles");
}
- /* --- tan --- */
+ /* --- tan (degree wrappers: 65536-pt, full sweep) --- */
{
stats_t &st = st_tan;
const u16 radix = 7;
for (int i = -32768; i <= 32767; i++) {
double deg = (double)i / (1 << radix);
double rad = deg * M_PI / 180.0;
- /* Skip near poles: |cos| < 0.01 → tan > 100 */
- if (fabs(cos(rad)) < 0.01) continue;
- stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), tan(rad));
+ stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
}
- /* Special cases: integer degrees (avoiding poles) */
s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150};
for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
s16 d = specials[si];
double rad = d * M_PI / 180.0;
- stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), tan(rad));
+ stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
}
- acc_row("tan", &st, "65536-pt sweep (skip poles)");
+ acc_row("tan (deg)", &st, "FR_Tan ±256° full (s16 at radix 7; FR_DEG2BAM); sat at poles");
+ }
+
+ /* --- tan (radian wrappers: 65536-pt, full sweep) --- */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+ s32 rad_fp = (s32)(angle * (1L << 16));
+ stats_add(&st, angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(angle)));
+ }
+ acc_row("tan (rad)", &st, "fr_tan ±2π r16 full; sat at poles");
}
/* --- asin / acos --- */
@@ -1851,9 +1913,9 @@ static void section_accuracy_table(void) {
double xd = (double)i / (1 << 15);
if (xd < -1.0 || xd > 1.0) continue;
s32 rad = FR_asin((s32)i, 15, R);
- stats_add(&st, xd, frd(rad, R), asin(xd));
+ stats_add(&st, xd, frd(rad, R), q16(asin(xd)));
rad = FR_acos((s32)i, 15, R);
- stats_add(&st, xd, frd(rad, R), acos(xd));
+ stats_add(&st, xd, frd(rad, R), q16(acos(xd)));
}
acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary");
}
@@ -1886,7 +1948,7 @@ static void section_accuracy_table(void) {
/* Skip near ±pi branch cut: sign depends on sub-LSB
* input quantization, not algorithm accuracy. */
if (fabs(fabs(ref) - M_PI) < 0.01) continue;
- stats_add(&st, angle * 180.0 / M_PI, frd(r, R), ref);
+ stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref));
}
}
/* Special cases: exact quadrant/octant/30-degree angles */
@@ -1898,7 +1960,7 @@ static void section_accuracy_table(void) {
s32 fx = (s32)(x * scale), fy = (s32)(y * scale);
if (fx == 0 && fy == 0) continue;
s32 r = FR_atan2(fy, fx, R);
- stats_add(&st, specials_deg[si], frd(r, R), atan2(y, x));
+ stats_add(&st, specials_deg[si], frd(r, R), q16(atan2(y, x)));
}
acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8");
}
@@ -1906,18 +1968,14 @@ static void section_accuracy_table(void) {
/* --- atan --- */
{
stats_t st; stats_reset(&st);
- /* Sweep atan(x) for x in [-10, 10] with fine steps near zero.
- * FR_atan(input, radix, out_radix) calls FR_atan2(input, 1< 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */
- stats_add(&st, x, frd(r, R), ref);
+ stats_add(&st, x, frd(r, R), q16(ref));
}
acc_row("exp", &st, "FR_MULK28 + FR_pow2");
}
@@ -2013,7 +2071,7 @@ static void section_accuracy_table(void) {
s32 r = FR_EXP_FAST(fr, R);
double ref = exp(x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), ref);
+ stats_add(&st, x, frd(r, R), q16(ref));
}
acc_row("exp_fast", &st, "Shift-only scaling");
}
@@ -2027,7 +2085,7 @@ static void section_accuracy_table(void) {
s32 r = FR_POW10(fr, R);
double ref = pow(10.0, x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), ref);
+ stats_add(&st, x, frd(r, R), q16(ref));
}
acc_row("pow10", &st, "FR_MULK28 + FR_pow2");
}
@@ -2041,7 +2099,7 @@ static void section_accuracy_table(void) {
s32 r = FR_POW10_FAST(fr, R);
double ref = pow(10.0, x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), ref);
+ stats_add(&st, x, frd(r, R), q16(ref));
}
acc_row("pow10_fast", &st, "Shift-only scaling");
}
@@ -2058,7 +2116,7 @@ static void section_accuracy_table(void) {
s32 fy = (s32)(cases[i].y * scale);
s32 r = FR_hypot(fx, fy, R);
double ref = hypot(cases[i].x, cases[i].y);
- stats_add(&st, ref, frd(r, R), ref);
+ stats_add(&st, ref, frd(r, R), q16(ref));
}
acc_row("hypot (exact)", &st, "64-bit intermediate");
}
@@ -2075,7 +2133,7 @@ static void section_accuracy_table(void) {
s32 fy = (s32)(cases[i].y * scale);
s32 r = FR_hypot_fast8(fx, fy);
double ref = hypot(cases[i].x, cases[i].y);
- if (ref > 0) stats_add(&st, ref, frd(r, R), ref);
+ if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref));
}
acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply");
}
@@ -2083,6 +2141,175 @@ static void section_accuracy_table(void) {
printf("\n");
printf("\n");
+ /* ── Test-only rows (not library functions — conversion & pipeline checks) ── */
+ md_h3("14.0.1 Conversion & pipeline accuracy (test-only)");
+ printf("| Function | Max err (%%) | Avg err (%%) | Note |\n");
+ printf("|---|---:|---:|---|\n");
+
+ /* --- rad→BAM conversion (standalone: 65536-pt) --- */
+ {
+ stats_t &st = st_rad2bam;
+ for (int i = 0; i < 65536; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+ s32 rad_fp = (s32)(angle * scale);
+ u16 got = fr_rad_to_bam(rad_fp, 16);
+ /* Exact BAM: wrap to u16 */
+ double exact_bam_d = angle * 65536.0 / (2.0 * M_PI);
+ s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5);
+ u16 expected = (u16)(exact_bam_s & 0xFFFF);
+ /* Feed stats as degrees so the error is interpretable */
+ double got_deg = got * (360.0 / 65536.0);
+ double exp_deg = expected * (360.0 / 65536.0);
+ stats_add(&st, angle, got_deg, exp_deg);
+ }
+ {
+ char note[128];
+ snprintf(note, sizeof(note),
+ "fr_rad_to_bam() ±2π at r16; max %d BAM LSB",
+ (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5));
+ acc_row("rad→BAM conv", &st, note);
+ }
+ }
+
+ /* --- deg→BAM conversion (standalone: 65536-pt) --- */
+ {
+ stats_t &st = st_deg2bam;
+ for (int i = 0; i < 65536; i++) {
+ double deg = -360.0 + (720.0 * i / 65536.0);
+ s32 deg_fp = (s32)(deg * scale);
+ u16 got = fr_deg_to_bam(deg_fp, 16);
+ /* Exact BAM: wrap to u16 */
+ double exact_bam_d = deg * 65536.0 / 360.0;
+ s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5);
+ u16 expected = (u16)(exact_bam_s & 0xFFFF);
+ double got_deg = got * (360.0 / 65536.0);
+ double exp_deg = expected * (360.0 / 65536.0);
+ stats_add(&st, deg, got_deg, exp_deg);
+ }
+ {
+ char note[128];
+ snprintf(note, sizeof(note),
+ "fr_deg_to_bam() ±360° at r16; max %d BAM LSB",
+ (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5));
+ acc_row("deg→BAM conv", &st, note);
+ }
+ }
+
+ /* --- sin / cos via integer degrees ±360° --- */
+ {
+ stats_t &st = st_sincos_deg_s32;
+ for (int deg = -360; deg <= 360; deg++) {
+ double rad = deg * M_PI / 180.0;
+ stats_add(&st, (double)deg, frd(fr_sin_deg(deg), FR_TRIG_OUT_PREC), q16(sin(rad)));
+ stats_add(&st, (double)deg, frd(fr_cos_deg(deg), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ }
+ acc_row("sin/cos (int deg)", &st, "fr_sin_deg/fr_cos_deg ±360° integer degrees");
+ }
+
+ /* --- tan via integer degrees ±360° --- */
+ {
+ stats_t &st = st_tan_deg_s32;
+ for (int deg = -360; deg <= 360; deg++) {
+ double rad = deg * M_PI / 180.0;
+ stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
+ }
+ acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles");
+ }
+
+ /* --- Conversion macro accuracy (all 6 direction macros) --- */
+
+ /* FR_RAD2BAM macro: test within safe range (±pi at r16) */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ double angle = -M_PI + (2.0 * M_PI * i / 65536.0);
+ s32 rad_fp = (s32)(angle * scale);
+ s32 raw = FR_RAD2BAM(rad_fp);
+ u16 got = (u16)((raw + (1 << 15)) >> 16);
+ double exact_d = angle * 65536.0 / (2.0 * M_PI);
+ u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
+ double got_deg = got * (360.0 / 65536.0);
+ double exp_deg = expected * (360.0 / 65536.0);
+ stats_add(&st, angle, got_deg, exp_deg);
+ }
+ acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad");
+ }
+
+ /* FR_DEG2BAM macro: test within safe range (±180° at r7) */
+ {
+ stats_t st; stats_reset(&st);
+ const u16 radix = 7;
+ for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */
+ double deg = (double)i / (1 << radix);
+ s32 raw = FR_DEG2BAM((s32)i);
+ u16 got = (u16)((raw + (1 << (radix - 1))) >> radix);
+ double exact_d = deg * 65536.0 / 360.0;
+ u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
+ double got_deg = got * (360.0 / 65536.0);
+ double exp_deg = expected * (360.0 / 65536.0);
+ stats_add(&st, deg, got_deg, exp_deg);
+ }
+ acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°");
+ }
+
+ /* FR_BAM2RAD macro: multiplies by 2π/65536 using shifts.
+ * BAM 0..32767 at r16 (upper half overflows s32 when <<16). */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 32768; i++) {
+ s32 bam_r16 = (s32)i << 16;
+ s32 rad_fp = FR_BAM2RAD(bam_r16);
+ double got_rad = frd(rad_fp, 16);
+ double exp_rad = (double)i * 2.0 * M_PI / 65536.0;
+ stats_add(&st, (double)i, got_rad, exp_rad);
+ }
+ acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)");
+ }
+
+ /* FR_BAM2DEG macro: multiplies by 360/65536 using shifts.
+ * BAM 0..32767 at r16 (same s32 overflow limit). */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 32768; i++) {
+ s32 bam_r16 = (s32)i << 16;
+ s32 deg_fp = FR_BAM2DEG(bam_r16);
+ double got_deg = frd(deg_fp, 16);
+ double exp_deg = (double)i * 360.0 / 65536.0;
+ stats_add(&st, (double)i, got_deg, exp_deg);
+ }
+ acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)");
+ }
+
+ /* FR_DEG2RAD macro: 65536-pt ±360° at r16 full */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ double deg = -360.0 + (720.0 * i / 65536.0);
+ s32 deg_fp = (s32)(deg * scale);
+ s32 rad_fp = FR_DEG2RAD(deg_fp);
+ double got_rad = frd(rad_fp, 16);
+ double exp_rad = deg * M_PI / 180.0;
+ stats_add(&st, deg, got_rad, exp_rad);
+ }
+ acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full");
+ }
+
+ /* FR_RAD2DEG macro: 65536-pt ±2π at r16 full */
+ {
+ stats_t st; stats_reset(&st);
+ for (int i = 0; i < 65536; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
+ s32 rad_fp = (s32)(angle * scale);
+ s32 deg_fp = FR_RAD2DEG(rad_fp);
+ double got_deg = frd(deg_fp, 16);
+ double exp_deg = angle * 180.0 / M_PI;
+ stats_add(&st, angle, got_deg, exp_deg);
+ }
+ acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full");
+ }
+
+ printf("\n");
+
/* Diagnostic: show where each trig function's worst % error occurs */
md_h3("14.1 Worst-case percent error diagnostics");
printf("Shows the input that produced the maximum %% error for each trig function.\n");
@@ -2092,10 +2319,14 @@ static void section_accuracy_table(void) {
printf("|---|---|---:|---:|---:|---:|\n");
struct { const char *name; stats_t *s; } diag[] = {
- {"sin / cos", &st_sincos},
- {"tan", &st_tan},
- {"asin/acos", &st_asincos},
- {"atan2", &st_atan2},
+ {"sin / cos", &st_sincos},
+ {"tan", &st_tan},
+ {"rad→BAM conv", &st_rad2bam},
+ {"deg→BAM conv", &st_deg2bam},
+ {"sin/cos (int deg)",&st_sincos_deg_s32},
+ {"tan (int deg)", &st_tan_deg_s32},
+ {"asin/acos", &st_asincos},
+ {"atan2", &st_atan2},
};
for (int d = 0; d < (int)(sizeof(diag)/sizeof(diag[0])); d++) {
stats_t *s = diag[d].s;
From 78fcf966609acef6b4f27b0f7fe9dd044be952ea Mon Sep 17 00:00:00 2001
From: deftio
Date: Wed, 29 Apr 2026 20:50:35 -0700
Subject: [PATCH 2/7] updated tan bam
---
src/FR_math.c | 33 ++++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 9 deletions(-)
diff --git a/src/FR_math.c b/src/FR_math.c
index dce131b..3a7a1d4 100644
--- a/src/FR_math.c
+++ b/src/FR_math.c
@@ -161,8 +161,17 @@ s32 fr_tan_bam(u16 bam)
lo = (s32)gFR_TAN_TAB_O[idx];
hi = (s32)gFR_TAN_TAB_O[idx + 1];
raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
- /* raw is in u0.15. Shift to s15.16. */
- raw <<= 1;
+
+ if (raw < 0x40) {
+ /* Near zero: redo interpolation with 4 extra bits of
+ * precision to reduce rounding error when result is small. */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+ raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */
+ } else {
+ raw <<= 1; /* u0.15 → s15.16 */
+ }
} else {
/* Second octant: tan(x) = 1 / tan(90° - x).
* complement is in (0, 0x2000] = (0°, 45°]. */
@@ -175,13 +184,19 @@ s32 fr_tan_bam(u16 bam)
hi = (s32)gFR_TAN_TAB_O[idx + 1];
raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
- /* raw is tan(complement) in u0.15. Compute 1/raw in s15.16.
- * 1.0 in s15.16 = 0x10000. We want (1<<16) / (raw_in_0.15)
- * = (1<<16) * (1<<15) / raw_raw = (1<<31) / raw.
- * Use unsigned to avoid overflow: 0x80000000 / raw. */
- if (raw < 2) {
- /* Near pole: saturate */
- raw = FR_TRIG_MAXVAL;
+ if (raw < 0x40) {
+ /* Near pole: redo interpolation with 4 extra bits of
+ * precision. The reciprocal amplifies small interpolation
+ * errors, so extra precision significantly helps here.
+ * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+ if (raw_hp < 32) {
+ raw = FR_TRIG_MAXVAL;
+ } else {
+ raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+ }
} else {
raw = (s32)(0x80000000u / (u32)raw);
}
From 1104f2eeeb163ee607331b2a7ce07f91e7bc55ac Mon Sep 17 00:00:00 2001
From: deftio
Date: Sun, 3 May 2026 14:53:13 -0700
Subject: [PATCH 3/7] minor docs and accuracy tune up
---
README.md | 64 ++-
compare_lfm/comparison_results.json | 479 ++++++++++++++++
compare_lfm/comparison_summary.md | 42 +-
docs/README.md | 69 +--
docs/api-reference.md | 48 +-
docs/building.md | 2 +-
docs/examples.md | 6 +-
docs/fixed-point-primer.md | 6 +-
keywords.txt | 9 +-
makefile | 17 +
pages/guide/api-reference.html | 45 +-
pages/guide/building.html | 2 +-
pages/guide/examples.html | 7 +-
pages/guide/fixed-point-primer.html | 6 +-
pages/index.html | 79 +--
scripts/accuracy_report.sh | 8 +-
src/FR_math.c | 519 +++++++++++++-----
src/FR_math.h | 123 ++---
src/FR_trig_table.h | 50 +-
tests/test_tdd.cpp | 815 +++++++++++++++++++++-------
tools/README.md | 131 +++++
tools/trig_neighborhood.cpp | 519 ++++++++++++++++++
22 files changed, 2458 insertions(+), 588 deletions(-)
create mode 100644 compare_lfm/comparison_results.json
create mode 100644 tools/README.md
create mode 100644 tools/trig_neighborhood.cpp
diff --git a/README.md b/README.md
index 357bd5b..784535c 100644
--- a/README.md
+++ b/README.md
@@ -27,33 +27,34 @@ beyond ``.
Errors below are measured at Q16.16 (s15.16). All functions accept any
radix — Q16.16 is just the reference point for the table.
-Percent errors skip expected values near zero (|expected| < 0.01).
At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the
number of fractional bits available. All functions support radix 0 to 30.
-| Function | Max err (%) | Avg err (%) | Note |
-|---|---:|---:|---|
-| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table |
-| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials |
-| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 |
-| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table |
-| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials |
-| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 |
-| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
-| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
-| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
-| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
-| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
-| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
-| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
+| Function | Max err (%)*| Avg err (%) | Peak at | Note |
+|---|---:|---:|---:|---|
+| sin/cos (BAM) | 0.4578 | 0.0076 | 94 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
+| sin/cos (deg) | 0.4578 | 0.0076 | -359.5 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
+| sin/cos (rad) | 0.6104 | 0.0085 | -4.721 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
+| tan (BAM) | 0.5823 | 0.0008 | 16360 | fr_tan_bam 65536-pt full; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | -270.1 | FR_Tan ±360° s15.16 full; sat at poles |
+| tan (rad) | 13.4069 | 0.0029 | -4.713 | fr_tan ±2π r16 full; sat at poles |
+| asin / acos | 0.8743 | 0.0301 | 0.0123 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.5100 | 0.0237 | -2.571 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.3390 | 0.0154 | -0.018 | 20001-pt full sweep [-10,10]; via FR_atan2 |
+| sqrt | 0.0239 | 0.0000 | 0.0001 | Round-to-nearest |
+| log2 | 0.0286 | 0.0029 | 0.895 | 65-entry mantissa table |
+| pow2 | 0.0019 | 0.0003 | 3.36 | 65-entry fraction table |
+| ln, log10 | 0.0004 | 0.0000 | 50 | Via FR_MULK28 from log2 |
+| exp | 0.0003 | 0.0000 | 3.91 | FR_MULK28 + FR_pow2 |
+| exp_fast | 0.0009 | 0.0001 | 3.92 | Shift-only scaling |
+| pow10 | 0.0007 | 0.0000 | 1.97 | FR_MULK28 + FR_pow2 |
+| pow10_fast | 0.0028 | 0.0002 | 1.99 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | 0 | 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | 1000 | Shift-only, no multiply |
+
+*Relative error; reference clamped to 1% of full-scale output. "Peak at" = input that produced max error.
### What's in the box
@@ -62,8 +63,8 @@ number of fractional bits available. All functions support radix 0 to 30.
|---|---|
| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
-| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` |
+| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
+| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` |
| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
@@ -166,18 +167,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */
*
* MixedCase FR_ names are functions — they contain loops, tables, or
* multi-step algorithms where inlining would waste ROM:
- * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
+ * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
*
- * lowercase fr_ names are v2 functions (radian trig, wave generators,
- * ADSR envelopes):
- * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ...
+ * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave
+ * generators, ADSR envelopes):
+ * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan,
+ * fr_wave_tri, fr_adsr_step ...
+ *
+ * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are
+ * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code
+ * should use the fr_ names directly.
*
* Some macros wrap functions: FR_EXP(x,r) scales x then calls
* FR_pow2 — one-liner convenience, heavy lifting in the function.
*/
/* ---- Math functions ---- */
-s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */
+s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */
s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */
s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */
s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */
diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json
new file mode 100644
index 0000000..b9c1b3b
--- /dev/null
+++ b/compare_lfm/comparison_results.json
@@ -0,0 +1,479 @@
+{
+ "description": "FR_math vs libfixmath benchmark — both measured against math.h double precision (IEEE 754)",
+ "gold_standard": " IEEE 754 double precision (~15 significant digits)",
+ "fixed_point_format": "Q16.16 (s15.16), 1 LSB = 1.52587890625000e-05",
+ "accuracy_points": 65536,
+ "timing_iterations": 100000,
+ "rel_error_threshold": 0.01,
+ "platform": "macOS ARM (Apple Silicon)",
+ "optimization": "-O2",
+ "results": [
+ {
+ "function": "sin",
+ "double_reference": "std::sin",
+ "sweep": "65536-pt, [-pi, +pi]",
+ "speed": {
+ "fr_math_ns_per_call": 5.6,
+ "libfixmath_ns_per_call": 10.6,
+ "fr_math_speedup": 1.91,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 7.40851348e-05,
+ "mean_abs_error": 1.88938357e-05,
+ "max_error_lsb": 4.9,
+ "mean_error_lsb": 1.2,
+ "max_rel_error_pct": 0.4816,
+ "mean_rel_error_pct": 0.0081
+ },
+ "libfixmath": {
+ "max_abs_error": 7.74511497e-03,
+ "mean_abs_error": 5.34549003e-04,
+ "max_error_lsb": 507.6,
+ "mean_error_lsb": 35.0,
+ "max_rel_error_pct": 74.5513,
+ "mean_rel_error_pct": 0.6105
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "cos",
+ "double_reference": "std::cos",
+ "sweep": "65536-pt, [-pi, +pi]",
+ "speed": {
+ "fr_math_ns_per_call": 8.9,
+ "libfixmath_ns_per_call": 13.3,
+ "fr_math_speedup": 1.50,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 6.76591807e-05,
+ "mean_abs_error": 2.03740409e-05,
+ "max_error_lsb": 4.4,
+ "mean_error_lsb": 1.3,
+ "max_rel_error_pct": 0.3282,
+ "mean_rel_error_pct": 0.0077
+ },
+ "libfixmath": {
+ "max_abs_error": 7.75591931e-03,
+ "mean_abs_error": 5.36939114e-04,
+ "max_error_lsb": 508.3,
+ "mean_error_lsb": 35.2,
+ "max_rel_error_pct": 74.4001,
+ "mean_rel_error_pct": 0.6121
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "tan",
+ "double_reference": "std::tan",
+ "sweep": "65536-pt, [-1.2, 1.2] rad",
+ "speed": {
+ "fr_math_ns_per_call": 7.1,
+ "libfixmath_ns_per_call": 32.6,
+ "fr_math_speedup": 4.57,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 1.98158306e-04,
+ "mean_abs_error": 3.37019908e-05,
+ "max_error_lsb": 13.0,
+ "mean_error_lsb": 2.2,
+ "max_rel_error_pct": 0.1551,
+ "mean_rel_error_pct": 0.0055
+ },
+ "libfixmath": {
+ "max_abs_error": 1.82495961e-02,
+ "mean_abs_error": 8.01092905e-04,
+ "max_error_lsb": 1196.0,
+ "mean_error_lsb": 52.5,
+ "max_rel_error_pct": 0.7099,
+ "mean_rel_error_pct": 0.0410
+ },
+ "closer_to_double": "fr_math"
+ },
+ "note": "Skip near pi/2"
+ },
+ {
+ "function": "asin",
+ "double_reference": "std::asin",
+ "sweep": "65536-pt, [-0.999, 0.999]",
+ "speed": {
+ "fr_math_ns_per_call": 9.7,
+ "libfixmath_ns_per_call": 49.5,
+ "fr_math_speedup": 5.11,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 3.79872462e-04,
+ "mean_abs_error": 1.04994412e-04,
+ "max_error_lsb": 24.9,
+ "mean_error_lsb": 6.9,
+ "max_rel_error_pct": 1.9776,
+ "mean_rel_error_pct": 0.0477
+ },
+ "libfixmath": {
+ "max_abs_error": 1.01788963e-02,
+ "mean_abs_error": 3.64421558e-03,
+ "max_error_lsb": 667.1,
+ "mean_error_lsb": 238.8,
+ "max_rel_error_pct": 20.1233,
+ "mean_rel_error_pct": 2.4452
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "acos",
+ "double_reference": "std::acos",
+ "sweep": "65536-pt, [-0.999, 0.999]",
+ "speed": {
+ "fr_math_ns_per_call": 8.4,
+ "libfixmath_ns_per_call": 50.7,
+ "fr_math_speedup": 6.03,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 3.75418007e-04,
+ "mean_abs_error": 1.00708880e-04,
+ "max_error_lsb": 24.6,
+ "mean_error_lsb": 6.6,
+ "max_rel_error_pct": 0.2724,
+ "mean_rel_error_pct": 0.0093
+ },
+ "libfixmath": {
+ "max_abs_error": 1.01897006e-02,
+ "mean_abs_error": 3.64422377e-03,
+ "max_error_lsb": 667.8,
+ "mean_error_lsb": 238.8,
+ "max_rel_error_pct": 15.3142,
+ "mean_rel_error_pct": 0.3475
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "atan",
+ "double_reference": "std::atan",
+ "sweep": "65536-pt, [-50, 50]",
+ "speed": {
+ "fr_math_ns_per_call": 8.1,
+ "libfixmath_ns_per_call": 11.0,
+ "fr_math_speedup": 1.37,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 9.14677954e-04,
+ "mean_abs_error": 7.43583969e-05,
+ "max_error_lsb": 59.9,
+ "mean_error_lsb": 4.9,
+ "max_rel_error_pct": 0.2149,
+ "mean_rel_error_pct": 0.0061
+ },
+ "libfixmath": {
+ "max_abs_error": 1.01676134e-02,
+ "mean_abs_error": 6.15802358e-03,
+ "max_error_lsb": 666.3,
+ "mean_error_lsb": 403.6,
+ "max_rel_error_pct": 19.8632,
+ "mean_rel_error_pct": 0.4571
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "atan2",
+ "double_reference": "std::atan2",
+ "sweep": "65536-pt, 5 radii x 360 deg",
+ "speed": {
+ "fr_math_ns_per_call": 15.9,
+ "libfixmath_ns_per_call": 10.9,
+ "fr_math_speedup": 0.69,
+ "faster": "libfixmath"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 9.53437855e-04,
+ "mean_abs_error": 1.91371871e-04,
+ "max_error_lsb": 62.5,
+ "mean_error_lsb": 12.5,
+ "max_rel_error_pct": 0.4122,
+ "mean_rel_error_pct": 0.0239
+ },
+ "libfixmath": {
+ "max_abs_error": 1.01728729e-02,
+ "mean_abs_error": 3.88005371e-03,
+ "max_error_lsb": 666.7,
+ "mean_error_lsb": 254.3,
+ "max_rel_error_pct": 20.0045,
+ "mean_rel_error_pct": 0.9267
+ },
+ "closer_to_double": "fr_math"
+ },
+ "note": "All 4 quadrants"
+ },
+ {
+ "function": "sqrt",
+ "double_reference": "std::sqrt",
+ "sweep": "65536-pt, [0.01, 100]",
+ "speed": {
+ "fr_math_ns_per_call": 18.6,
+ "libfixmath_ns_per_call": 19.9,
+ "fr_math_speedup": 1.07,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 7.62924903e-06,
+ "mean_abs_error": 3.80582266e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.2,
+ "max_rel_error_pct": 0.0062,
+ "mean_rel_error_pct": 0.0001
+ },
+ "libfixmath": {
+ "max_abs_error": 7.62924903e-06,
+ "mean_abs_error": 3.80582266e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.2,
+ "max_rel_error_pct": 0.0062,
+ "mean_rel_error_pct": 0.0001
+ },
+ "closer_to_double": "tie"
+ }
+ },
+ {
+ "function": "exp",
+ "double_reference": "std::exp",
+ "sweep": "65536-pt, [-5, 5]",
+ "speed": {
+ "fr_math_ns_per_call": 3.0,
+ "libfixmath_ns_per_call": 64.7,
+ "fr_math_speedup": 21.28,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 3.17909587e-03,
+ "mean_abs_error": 1.03218909e-04,
+ "max_error_lsb": 208.3,
+ "mean_error_lsb": 6.8,
+ "max_rel_error_pct": 0.1486,
+ "mean_rel_error_pct": 0.0078
+ },
+ "libfixmath": {
+ "max_abs_error": 3.30095957e-03,
+ "mean_abs_error": 9.38398029e-05,
+ "max_error_lsb": 216.3,
+ "mean_error_lsb": 6.1,
+ "max_rel_error_pct": 0.0756,
+ "mean_rel_error_pct": 0.0042
+ },
+ "closer_to_double": "fr_math"
+ }
+ },
+ {
+ "function": "ln",
+ "double_reference": "std::log",
+ "sweep": "65536-pt, [0.01, 100]",
+ "speed": {
+ "fr_math_ns_per_call": 9.0,
+ "libfixmath_ns_per_call": 453.2,
+ "fr_math_speedup": 50.53,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 4.93278555e-05,
+ "mean_abs_error": 1.61117669e-05,
+ "max_error_lsb": 3.2,
+ "mean_error_lsb": 1.1,
+ "max_rel_error_pct": 0.3012,
+ "mean_rel_error_pct": 0.0006
+ },
+ "libfixmath": {
+ "max_abs_error": 3.40447818e-05,
+ "mean_abs_error": 5.14211182e-06,
+ "max_error_lsb": 2.2,
+ "mean_error_lsb": 0.3,
+ "max_rel_error_pct": 0.0557,
+ "mean_rel_error_pct": 0.0002
+ },
+ "closer_to_double": "libfixmath"
+ }
+ },
+ {
+ "function": "log2",
+ "double_reference": "std::log2",
+ "sweep": "65536-pt, [0.01, 100]",
+ "speed": {
+ "fr_math_ns_per_call": 8.5,
+ "libfixmath_ns_per_call": 39.4,
+ "fr_math_speedup": 4.63,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 6.06739329e-05,
+ "mean_abs_error": 2.30368713e-05,
+ "max_error_lsb": 4.0,
+ "mean_error_lsb": 1.5,
+ "max_rel_error_pct": 0.4945,
+ "mean_rel_error_pct": 0.0006
+ },
+ "libfixmath": {
+ "max_abs_error": 3.56826644e-05,
+ "mean_abs_error": 9.96190621e-06,
+ "max_error_lsb": 2.3,
+ "mean_error_lsb": 0.7,
+ "max_rel_error_pct": 0.1758,
+ "mean_rel_error_pct": 0.0002
+ },
+ "closer_to_double": "libfixmath"
+ }
+ },
+ {
+ "function": "mul",
+ "double_reference": "double a*b",
+ "sweep": "65536-pt, a in [-50,50], b in [-2,2]",
+ "speed": {
+ "fr_math_ns_per_call": 0.9,
+ "libfixmath_ns_per_call": 1.2,
+ "fr_math_speedup": 1.33,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 7.62939453e-06,
+ "mean_abs_error": 3.81535541e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.3,
+ "max_rel_error_pct": 0.0692,
+ "mean_rel_error_pct": 0.0004
+ },
+ "libfixmath": {
+ "max_abs_error": 7.62939453e-06,
+ "mean_abs_error": 3.81535541e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.3,
+ "max_rel_error_pct": 0.0692,
+ "mean_rel_error_pct": 0.0004
+ },
+ "closer_to_double": "tie"
+ }
+ },
+ {
+ "function": "div",
+ "double_reference": "double a/b",
+ "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]",
+ "speed": {
+ "fr_math_ns_per_call": 0.9,
+ "libfixmath_ns_per_call": 5.3,
+ "fr_math_speedup": 6.10,
+ "faster": "fr_math"
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 7.62927377e-06,
+ "mean_abs_error": 3.82182808e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.3,
+ "max_rel_error_pct": 0.0727,
+ "mean_rel_error_pct": 0.0010
+ },
+ "libfixmath": {
+ "max_abs_error": 8.37162948e-06,
+ "mean_abs_error": 3.82625614e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.3,
+ "max_rel_error_pct": 0.0727,
+ "mean_rel_error_pct": 0.0010
+ },
+ "closer_to_double": "fr_math"
+ },
+ "note": "Both use 64-bit intermediate"
+ },
+ {
+ "function": "hypot",
+ "double_reference": "std::hypot",
+ "sweep": "65536-pt, 5 radii x 360 deg",
+ "speed": {
+ "fr_math_ns_per_call": 19.9
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 7.62930188e-06,
+ "mean_abs_error": 3.67171926e-06,
+ "max_error_lsb": 0.5,
+ "mean_error_lsb": 0.2,
+ "max_rel_error_pct": 0.0076,
+ "mean_rel_error_pct": 0.0009
+ }
+ },
+ "note": "FR_math only (libfixmath has no hypot)"
+ },
+ {
+ "function": "hypot_fast8",
+ "double_reference": "std::hypot",
+ "sweep": "65536-pt, 5 radii x 360 deg",
+ "speed": {
+ "fr_math_ns_per_call": 2.6
+ },
+ "accuracy_vs_double": {
+ "fr_math": {
+ "max_abs_error": 1.37244198e+00,
+ "mean_abs_error": 1.13634634e-01,
+ "max_error_lsb": 89944.4,
+ "mean_error_lsb": 7447.2,
+ "max_rel_error_pct": 0.1372,
+ "mean_rel_error_pct": 0.0516
+ }
+ },
+ "note": "FR_math only; shift-only, no multiply"
+ }
+ ],
+ "summary": {
+ "head_to_head_functions": 13,
+ "faster_wins": { "fr_math": 12, "libfixmath": 1 },
+ "accuracy_wins": { "fr_math": 9, "libfixmath": 2, "tie": 2 },
+ "total_functions_tested": 15
+ },
+ "notes": [
+ "All accuracy measured vs IEEE 754 double. Lower = closer to perfect.",
+ "LSB = Q16.16 least-significant-bit = 1.53e-5. Best possible = 0.5 LSB.",
+ "Percent errors skip |ref| < 0.01 to avoid near-zero division spikes.",
+ "Both libraries use Q16.16 (s15.16): 1.0 = 65536.",
+ "FR_math trig: BAM + 129-entry LUT + linear interpolation.",
+ "libfixmath trig: parabolic approximation + 5th-order correction.",
+ "Timing: min of 3 passes x 100000 calls; cache-warm.",
+ "Speedup > 1.0 means FR_math is faster by that factor."
+ ],
+ "compiled_size_note": "Run 'make size' in .compare/ for live numbers. The values below are representative.",
+ "compiled_size": {
+ "compiler": "clang -O2 (macOS ARM)",
+ "fr_math": {
+ "files": "FR_math.c (single file)",
+ "functions": "trig(6), inv-trig(4), log/ln/log10, exp/pow2/pow10, exp_fast/pow10_fast, sqrt, hypot(2), waves(6), ADSR(4), print(4), format",
+ "rom_bytes": 7470,
+ "ram_bss_bytes": 0,
+ "note": "All tables in const ROM. Zero runtime allocation."
+ },
+ "libfixmath": {
+ "files": "fix16.c, fix16_sqrt.c, fix16_exp.c, fix16_trig.c, fix16_str.c, uint32.c, fract32.c",
+ "functions": "trig(6), inv-trig(4), log/log2, exp, sqrt, mul/div, str",
+ "rom_bytes": 4912,
+ "ram_bss_bytes": 114688,
+ "rom_bytes_no_cache": 5476,
+ "ram_bss_bytes_no_cache": 0,
+ "note": "Default mode caches 112 KB of sin/exp LUTs in BSS. FIXMATH_NO_CACHE eliminates RAM but recomputes per call."
+ }
+ }
+}
diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md
index 9169c50..e547ce4 100644
--- a/compare_lfm/comparison_summary.md
+++ b/compare_lfm/comparison_summary.md
@@ -23,13 +23,13 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
| Function | FR max LSB | FR max %% | FR avg %% | lfm max LSB | lfm max %% | lfm avg %% | Winner |
|----------|----------:|---------:|---------:|----------:|---------:|---------:|--------|
-| sin | 8.8 | 1.0615 | 0.0158 | 507.6 | 74.5513 | 0.6105 | FR |
-| cos | 8.2 | 0.9018 | 0.0161 | 508.3 | 74.4001 | 0.6121 | FR |
-| tan | 55.7 | 1.0080 | 0.0228 | 1196.0 | 0.7099 | 0.0410 | FR |
-| asin | 31.3 | 0.5795 | 0.0134 | 667.1 | 20.1233 | 2.4452 | FR |
-| acos | 31.0 | 0.5194 | 0.0056 | 667.8 | 15.3142 | 0.3475 | FR |
-| atan | 62.7 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR |
-| atan2 | 63.6 | 0.4122 | 0.0258 | 666.7 | 20.0045 | 0.9267 | FR |
+| sin | 4.9 | 0.4816 | 0.0081 | 507.6 | 74.5513 | 0.6105 | FR |
+| cos | 4.4 | 0.3282 | 0.0077 | 508.3 | 74.4001 | 0.6121 | FR |
+| tan | 13.0 | 0.1551 | 0.0055 | 1196.0 | 0.7099 | 0.0410 | FR |
+| asin | 24.9 | 1.9776 | 0.0477 | 667.1 | 20.1233 | 2.4452 | FR |
+| acos | 24.6 | 0.2724 | 0.0093 | 667.8 | 15.3142 | 0.3475 | FR |
+| atan | 59.9 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR |
+| atan2 | 62.5 | 0.4122 | 0.0239 | 666.7 | 20.0045 | 0.9267 | FR |
| sqrt | 0.5 | 0.0062 | 0.0001 | 0.5 | 0.0062 | 0.0001 | tie |
| exp | 208.3 | 0.1486 | 0.0078 | 216.3 | 0.0756 | 0.0042 | FR |
| ln | 3.2 | 0.3012 | 0.0006 | 2.2 | 0.0557 | 0.0002 | lfm |
@@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
| Function | FR_math | libfixmath | Speedup | Faster |
|----------|--------:|-----------:|--------:|--------|
-| sin | 2.6 | 20.7 | 7.94x | FR |
-| cos | 4.8 | 18.4 | 3.86x | FR |
-| tan | 6.0 | 41.4 | 6.89x | FR |
-| asin | 11.5 | 53.7 | 4.67x | FR |
-| acos | 8.4 | 50.4 | 5.97x | FR |
-| atan | 8.0 | 11.2 | 1.41x | FR |
-| atan2 | 15.9 | 10.5 | 0.66x | lfm |
-| sqrt | 18.6 | 19.8 | 1.06x | FR |
-| exp | 3.1 | 67.6 | 22.02x | FR |
-| ln | 8.8 | 479.3 | 54.70x | FR |
-| log2 | 8.7 | 39.4 | 4.55x | FR |
+| sin | 5.6 | 10.6 | 1.91x | FR |
+| cos | 8.9 | 13.3 | 1.50x | FR |
+| tan | 7.1 | 32.6 | 4.57x | FR |
+| asin | 9.7 | 49.5 | 5.11x | FR |
+| acos | 8.4 | 50.7 | 6.03x | FR |
+| atan | 8.1 | 11.0 | 1.37x | FR |
+| atan2 | 15.9 | 10.9 | 0.69x | lfm |
+| sqrt | 18.6 | 19.9 | 1.07x | FR |
+| exp | 3.0 | 64.7 | 21.28x | FR |
+| ln | 9.0 | 453.2 | 50.53x | FR |
+| log2 | 8.5 | 39.4 | 4.63x | FR |
| mul | 0.9 | 1.2 | 1.33x | FR |
-| div | 0.9 | 5.2 | 5.98x | FR |
-| hypot | 20.0 | --- | --- | FR only |
-| hypot_fast8 | 2.4 | --- | --- | FR only |
+| div | 0.9 | 5.3 | 6.10x | FR |
+| hypot | 19.9 | --- | --- | FR only |
+| hypot_fast8 | 2.6 | --- | --- | FR only |
### Summary (13 head-to-head functions)
diff --git a/docs/README.md b/docs/README.md
index c88b451..dad53da 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -43,30 +43,32 @@ or any tooling. If you want the browser version, look in
Errors below are measured at Q16.16 (s15.16). All functions accept any
radix — Q16.16 is just the reference point for the table. See the
[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12,
-16, and 24. Percent errors skip expected values near zero (|expected| < 0.01).
-
-
-| Function | Max err (%) | Avg err (%) | Note |
-|---|---:|---:|---|
-| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table |
-| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials |
-| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 |
-| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table |
-| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials |
-| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 |
-| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0003 | 0.0000 | Round-to-nearest |
-| log2 | 0.2479 | 0.0045 | 65-entry mantissa table |
-| pow2 | 0.1373 | 0.0057 | 65-entry fraction table |
-| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 |
-| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0719 | 0.0064 | Shift-only scaling |
-| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling |
-| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply |
+16, and 24.
+
+
+| Function | Max err (%)*| Avg err (%) | Note |
+|---|---:|---:|---|
+| sin/cos (BAM) | 0.4578 | 0.0076 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
+| sin/cos (deg) | 0.4578 | 0.0076 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
+| sin/cos (rad) | 0.6104 | 0.0085 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
+| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | FR_Tan ±360° s15.16 full; sat at poles |
+| tan (rad) | 13.4069 | 0.0029 | fr_tan ±2π r16 full; sat at poles |
+| asin / acos | 0.8743 | 0.0301 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.5100 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.3390 | 0.0154 | 20001-pt full sweep [-10,10]; via FR_atan2 |
+| sqrt | 0.0239 | 0.0000 | Round-to-nearest |
+| log2 | 0.0286 | 0.0029 | 65-entry mantissa table |
+| pow2 | 0.0019 | 0.0003 | 65-entry fraction table |
+| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 |
+| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 |
+| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
+| pow10 | 0.0007 | 0.0000 | FR_MULK28 + FR_pow2 |
+| pow10_fast | 0.0028 | 0.0002 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
+
+*Relative error; reference clamped to 1% of full-scale output.
## What's in the box
@@ -75,8 +77,8 @@ radix — Q16.16 is just the reference point for the table. See the
| --- | --- |
| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
-| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` |
+| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
+| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` |
| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
@@ -167,18 +169,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */
*
* MixedCase FR_ names are functions — they contain loops, tables, or
* multi-step algorithms where inlining would waste ROM:
- * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
+ * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ...
*
- * lowercase fr_ names are v2 functions (radian trig, wave generators,
- * ADSR envelopes):
- * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ...
+ * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave
+ * generators, ADSR envelopes):
+ * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan,
+ * fr_wave_tri, fr_adsr_step ...
+ *
+ * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are
+ * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code
+ * should use the fr_ names directly.
*
* Some macros wrap functions: FR_EXP(x,r) scales x then calls
* FR_pow2 — one-liner convenience, heavy lifting in the function.
*/
/* ---- Math functions ---- */
-s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */
+s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */
s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */
s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */
s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */
diff --git a/docs/api-reference.md b/docs/api-reference.md
index 1ec3742..79a1cd9 100644
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -291,7 +291,7 @@ bits = 16. Going wider would only add noise, not precision.
"But what if I want to pass in any signed angle without worrying
about conversion?" That is exactly what `FR_CosI(deg)`,
-`FR_Cos(deg, radix)`, and `fr_cos(rad, radix)` are for. All three
+`fr_cos_deg(deg, radix)`, and `fr_cos(rad, radix)` are for. All three
take *signed* inputs and reduce them to BAM for you. The only place
you actually see a `u16` is at the internal `fr_cos_bam` /
`fr_sin_bam` boundary, which you only call by hand if you *want*
@@ -405,35 +405,35 @@ represents exactly 1.0 in the s15.16 output format.
| `fr_sin` | `s32 fr_sin(s32 rad, u16 radix)` | Same convention. |
| `fr_tan` | `s32 fr_tan(s32 rad, u16 radix)` | Returns at **radix 16** (`FR_TRIG_OUT_PREC`). Computed as `(sin << 16) / cos`; saturates to `±INT32_MAX` (`FR_TRIG_MAXVAL`) near π/2 + kπ where cos → 0. |
-### Integer-degree wrappers (legacy API)
+### Degree wrappers (current and legacy)
-The uppercase legacy API takes an angle in degrees.
-`FR_SinI`, `FR_CosI` and `FR_TanI`
-take plain integer degrees — the trailing *I* denotes
-*integer*. The variants *without* the `I`
-suffix (`FR_Sin`, `FR_Cos`, `FR_Tan`)
-accept a `radix` argument and treat the degree value as
-*fixed-point*, so you can pass fractional degrees like
-42.375°.
+The primary degree-based API uses lowercase `fr_` names.
+These are functions (not macros) that take a degree value as
+fixed-point at a caller-chosen radix:
-| Symbol | Signature | Kind |
+| Function | Signature | Notes |
| --- | --- | --- |
-| `FR_SinI` | `FR_SinI(deg)` → `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. |
-| `FR_CosI` | `FR_CosI(deg)` → `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. |
-| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
-| `FR_Sin` | `s32 FR_Sin(s16 deg, u16 radix)` | `deg` is fixed-point at `radix`. Returns s15.16. |
-| `FR_Cos` | `s32 FR_Cos(s16 deg, u16 radix)` | Same. |
-| `FR_Tan` | `s32 FR_Tan(s16 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
+| `fr_sin_deg` | `s32 fr_sin_deg(s32 deg, u16 radix)` | `deg` is fixed-point degrees at `radix`. Returns s15.16. |
+| `fr_cos_deg` | `s32 fr_cos_deg(s32 deg, u16 radix)` | Same. |
+| `fr_tan_deg` | `s32 fr_tan_deg(s32 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
-### Degree wrappers on the BAM path
+Pass `radix = 0` for plain integer degrees, or a higher radix
+for fractional degrees (e.g. 42.375° at radix 4).
-If you're using the lowercase family and want to skip the
-radix entirely, two convenience macros cover pure integer degrees:
+**Integer-degree macros** (`FR_SinI`, `FR_CosI`, `FR_TanI`)
+take plain integer degrees -- the trailing *I* denotes
+*integer*. These remain unchanged:
-| Macro | Expansion |
-| --- | --- |
-| `fr_cos_deg(deg)` | `fr_cos_bam(FR_DEG2BAM(deg))` |
-| `fr_sin_deg(deg)` | `fr_sin_bam(FR_DEG2BAM(deg))` |
+| Symbol | Signature | Kind |
+| --- | --- | --- |
+| `FR_SinI` | `FR_SinI(deg)` -> `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. |
+| `FR_CosI` | `FR_CosI(deg)` -> `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. |
+| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. |
+
+**Legacy aliases.** The uppercase `FR_Sin`, `FR_Cos`, and
+`FR_Tan` macros still work -- they map directly to
+`fr_sin_deg`, `fr_cos_deg`, and `fr_tan_deg` respectively.
+New code should use the `fr_` names.
## Inverse trigonometry
diff --git a/docs/building.md b/docs/building.md
index da61b4f..4a17c1a 100644
--- a/docs/building.md
+++ b/docs/building.md
@@ -96,7 +96,7 @@ binaries to keep compile times low:
| Binary | What it checks |
| --- | --- |
| `test_basic` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding. |
-| `test_trig` | Integer-degree trig (`FR_Sin` et al.). |
+| `test_trig` | Integer-degree trig (`fr_sin_deg` et al.). |
| `test_trig_radians` | Radian / BAM trig and the v2 `fr_sin` API. |
| `test_log_exp` | Log base 2 / ln / log10 and their inverses. |
| `test_2d` | 2D transforms, determinants, inverses. |
diff --git a/docs/examples.md b/docs/examples.md
index 1716efd..d07a477 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -57,15 +57,15 @@ int main(void)
## 2. Trig — integer degrees vs radian vs BAM
FR_Math supports three angle conventions and this example hits
-all three: integer degrees through the legacy
-`FR_Sin` / `FR_Cos` API, the radian-native
+all three: fixed-point degrees through the
+`fr_sin_deg` / `fr_cos_deg` API, the radian-native
`fr_sin` / `fr_cos` (radian at a chosen
input radix), and BAM-native `fr_sin_bam` /
`fr_cos_bam`. All three paths feed the same 129-entry
quadrant cosine table under the hood and should produce nearly
identical results.
-*Caveats:* the `radix` parameter on `FR_Sin(deg, radix)` is
+*Caveats:* the `radix` parameter on `fr_sin_deg(deg, radix)` is
the radix of the *degree input*, not the output. All sin/cos
functions return **s15.16** — that is, `s32` at radix 16,
where 1.0 = 65536 (`FR_TRIG_ONE`). The values compared below
diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md
index ab79007..489a127 100644
--- a/docs/fixed-point-primer.md
+++ b/docs/fixed-point-primer.md
@@ -862,8 +862,8 @@ generation of each symbol:
| Prefix | What it is | Example |
| --- | --- | --- |
| `FR_XXX()` | `UPPERCASE` macro — inline, zero call overhead. | `FR_ADD`, `FR_ABS`, `FR2I` |
-| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Integer-degree trig and related. | `FR_Sin`, `FR_log2`, `FR_sqrt` |
-| `fr_xxx()` | Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR). | `fr_sin`, `fr_wave_tri`, `fr_adsr_step` |
+| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Log, sqrt, inverse trig, and related. | `FR_log2`, `FR_sqrt`, `FR_atan2` |
+| `fr_xxx()` | Lowercase C function — v2 API (degree/radian/BAM trig, wave generators, ADSR). `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` are the current degree-based trig names. `FR_Sin`/`FR_Cos`/`FR_Tan` remain as legacy aliases. | `fr_sin_deg`, `fr_sin`, `fr_wave_tri`, `fr_adsr_step` |
| `s8, s16, s32` | Signed integer typedefs (aliases for `int8_t`, `int16_t`, `int32_t`). | — |
| `u8, u16, u32` | Unsigned integer typedefs. | — |
@@ -923,7 +923,7 @@ explicitly mask off the top bits (and handle negative values)
before the quadrant extraction (`bam >> 14`) made any sense. You
would have traded one free operation for two slow ones on every
sample, just to get back the same behavior. So instead, the public
-trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends)
+trig entry points (`FR_CosI`, `fr_cos_deg`, `fr_cos`, and friends)
*all* take signed angles — in degrees, fixed-radix degrees, or
radians — and only the internal `fr_cos_bam` / `fr_sin_bam`
primitives see the `u16`. In practice you will never construct a
diff --git a/keywords.txt b/keywords.txt
index 1ab2703..f7e784e 100644
--- a/keywords.txt
+++ b/keywords.txt
@@ -14,9 +14,9 @@ fr_adsr_t KEYWORD1
FR_FixMuls KEYWORD2
FR_FixMulSat KEYWORD2
FR_FixAddSat KEYWORD2
-FR_Cos KEYWORD2
-FR_Sin KEYWORD2
-FR_Tan KEYWORD2
+fr_sin_deg KEYWORD2
+fr_cos_deg KEYWORD2
+fr_tan_deg KEYWORD2
FR_TanI KEYWORD2
FR_acos KEYWORD2
FR_asin KEYWORD2
@@ -99,3 +99,6 @@ FR_SLOG2E LITERAL1
FR_SrLOG2E LITERAL1
FR_SLOG2_10 LITERAL1
FR_SrLOG2_10 LITERAL1
+FR_Cos LITERAL1
+FR_Sin LITERAL1
+FR_Tan LITERAL1
diff --git a/makefile b/makefile
index eb8a9fc..83a4e64 100644
--- a/makefile
+++ b/makefile
@@ -57,6 +57,10 @@ help:
@echo " size-report Multi-architecture size report"
@echo " size-simple Size report for current platform"
@echo ""
+ @echo "Tools:"
+ @echo " tools Build diagnostic tools"
+ @echo " trig-neighborhood Build function neighborhood explorer"
+ @echo ""
@echo "Maintenance:"
@echo " clean Remove build artifacts"
@echo " cleanall Remove build artifacts and backups"
@@ -211,6 +215,19 @@ size-simple: lib
ls -lh $(BUILD_DIR)/*.o; \
fi
+# Tools
+TOOLS_DIR = tools
+
+.PHONY: tools
+tools: dirs trig-neighborhood
+
+.PHONY: trig-neighborhood
+trig-neighborhood: $(BUILD_DIR)/trig_neighborhood
+
+$(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR_math.c $(HEADERS)
+ $(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/tool_FR_math.o
+ $(CXX) $(CXXFLAGS) $(TOOLS_DIR)/trig_neighborhood.cpp $(BUILD_DIR)/tool_FR_math.o $(LDFLAGS) -o $@
+
# Clean
.PHONY: clean
clean:
diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html
index 328d2f4..3e8a17f 100644
--- a/pages/guide/api-reference.html
+++ b/pages/guide/api-reference.html
@@ -507,7 +507,7 @@
Why u16 for BAM (not s32)?
“But what if I want to pass in any signed angle without
worrying about conversion?” That is exactly what
-FR_CosI(deg), FR_Cos(deg, radix), and
+FR_CosI(deg), fr_cos_deg(deg, radix), and
fr_cos(rad, radix) are for. All three take
signed inputs and reduce them to BAM for you. The only
place you actually see a u16 is at the internal
@@ -628,16 +628,18 @@
Radian-native
-
Integer-degree wrappers (legacy API)
+
Degree wrappers
-
The uppercase legacy API takes an angle in degrees.
+
The degree API takes an angle in degrees.
FR_SinI, FR_CosI and FR_TanI
take plain integer degrees — the trailing I denotes
-integer. The variants without the I
-suffix (FR_Sin, FR_Cos, FR_Tan)
-accept a radix argument and treat the degree value as
-fixed-point, so you can pass fractional degrees like
-42.375°.
+integer. The primary degree wrappers with a
+radix argument are fr_sin_deg,
+fr_cos_deg, and fr_tan_deg — they
+treat the degree value as fixed-point, so you can pass
+fractional degrees like 42.375°. The uppercase names
+FR_Sin, FR_Cos, and FR_Tan
+are legacy aliases that map to the same functions.
Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
-
FR_Sin
s32 FR_Sin(s16 deg, u16 radix)
deg is fixed-point at radix. Returns s15.16.
-
FR_Cos
s32 FR_Cos(s16 deg, u16 radix)
Same.
-
FR_Tan
s32 FR_Tan(s16 deg, u16 radix)
Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
+
fr_sin_deg
s32 fr_sin_deg(s32 deg, u16 radix)
Function. deg is fixed-point at radix. Returns s15.16.
+
fr_cos_deg
s32 fr_cos_deg(s32 deg, u16 radix)
Function. Same.
+
fr_tan_deg
s32 fr_tan_deg(s32 deg, u16 radix)
Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
+
FR_Sin
FR_Sin(deg, radix)
Legacy macro alias for fr_sin_deg.
+
FR_Cos
FR_Cos(deg, radix)
Legacy macro alias for fr_cos_deg.
+
FR_Tan
FR_Tan(deg, radix)
Legacy macro alias for fr_tan_deg.
-
Degree wrappers on the BAM path
+
Degree-to-BAM path
-
If you’re using the lowercase family and want to skip the
-radix entirely, two convenience macros cover pure integer degrees:
-
-
-
Macro
Expansion
-
-
fr_cos_deg(deg)
fr_cos_bam(FR_DEG2BAM(deg))
-
fr_sin_deg(deg)
fr_sin_bam(FR_DEG2BAM(deg))
-
-
+
fr_cos_deg, fr_sin_deg, and
+fr_tan_deg are now functions (not macros). They accept
+a fixed-point degree value with a radix argument,
+convert to BAM internally, and call the BAM core. For plain integer
+degrees with no radix parameter, use FR_CosI /
+FR_SinI / FR_TanI instead.
FR_Math supports three angle conventions and this example hits
-all three: integer degrees through the legacy
-FR_Sin / FR_Cos API, the radian-native
+all three: integer degrees through
+fr_sin_deg / fr_cos_deg (or the legacy
+aliases FR_Sin / FR_Cos), the radian-native
fr_sin / fr_cos (radian at a chosen
input radix), and BAM-native fr_sin_bam /
fr_cos_bam. All three paths feed the same 129-entry
@@ -79,7 +80,7 @@
2. Trig — integer degrees vs radian vs BAM
identical results.
Caveats: the radix parameter on
-FR_Sin(deg, radix) is the radix of the degree
+fr_sin_deg(deg, radix) is the radix of the degree
input, not the output. All sin/cos functions return
s15.16 — that is, s32 at radix 16,
where 1.0 = 65536 (FR_TRIG_ONE). The values compared
diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html
index ce41074..73325ef 100644
--- a/pages/guide/fixed-point-primer.html
+++ b/pages/guide/fixed-point-primer.html
@@ -911,8 +911,8 @@
FR_Math’s naming conventions
Prefix
What it is
Example
FR_XXX()
UPPERCASE macro — inline, zero call overhead.
FR_ADD, FR_ABS, FR2I
-
FR_Xxx()
Mixed-case C function — the classic v1 API. Integer-degree trig and related.
FR_Sin, FR_log2, FR_sqrt
-
fr_xxx()
Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR).
fr_sin, fr_wave_tri, fr_adsr_step
+
FR_Xxx()
Mixed-case C function or legacy alias. FR_Sin/FR_Cos/FR_Tan are legacy aliases for fr_sin_deg/fr_cos_deg/fr_tan_deg.
FR_log2, FR_sqrt, FR_Sin (legacy)
+
fr_xxx()
Lowercase C function — the current API for degree wrappers, radian / BAM trig, wave generators, ADSR.
Signed integer typedefs (aliases for int8_t, int16_t, int32_t).
—
u8, u16, u32
Unsigned integer typedefs.
—
@@ -980,7 +980,7 @@
Angle representations
(bam >> 14) made any sense. You would have traded
one free operation for two slow ones on every sample, just to get
back the same behavior. So instead, the public trig entry points
-(FR_CosI, FR_Cos, fr_cos, and
+(FR_CosI, fr_cos_deg, fr_cos, and
friends) all take signed angles — in degrees,
fixed-radix degrees, or radians — and only the internal
fr_cos_bam / fr_sin_bam primitives see
diff --git a/pages/index.html b/pages/index.html
index 62e6b7a..0040cd4 100644
--- a/pages/index.html
+++ b/pages/index.html
@@ -47,34 +47,34 @@
Measured accuracy
Errors below are measured at Q16.16 (s15.16). All functions accept any
radix — Q16.16 is just the reference point for the table.
See the TDD
-report for sweeps at radixes 8, 12, 16, and 24.
-Percent errors skip expected values near zero (|expected| < 0.01).
-
-
-
-
Function
Max err (%)
Avg err (%)
Note
-
-
sin/cos (BAM)
0.1646
0.0058
65536 BAM; 129-entry quadrant table
-
sin/cos (deg)
0.5909
0.0091
65536-pt deg r7 + specials
-
sin/cos (rad)
0.1646
0.0059
65536-pt rad r16
-
tan (BAM)
0.1704
0.0065
65536 BAM; 65-entry octant table
-
tan (deg)
0.6000
0.0140
65536-pt deg r7 + specials
-
tan (rad)
0.1704
0.0065
65536-pt rad r16
-
asin / acos
1.9776
0.0308
65536-pt; sqrt approx near boundary
-
atan2
0.4953
0.0238
65536x5 radii; asin/acos+hypot_fast8
-
atan
0.2985
0.0153
20001-pt sweep [-10,10]; via FR_atan2
-
sqrt
0.0003
0.0000
Round-to-nearest
-
log2
0.2479
0.0045
65-entry mantissa table
-
pow2
0.1373
0.0057
65-entry fraction table
-
ln, log10
0.0015
0.0004
Via FR_MULK28 from log2
-
exp
0.0719
0.0051
FR_MULK28 + FR_pow2
-
exp_fast
0.0719
0.0064
Shift-only scaling
-
pow10
0.1163
0.0075
FR_MULK28 + FR_pow2
-
pow10_fast
0.1163
0.0100
Shift-only scaling
-
hypot (exact)
0.0001
0.0000
64-bit intermediate
-
hypot_fast8 (8-seg)
0.0977
0.0508
Shift-only, no multiply
-
-
+report for sweeps at radixes 8, 12, 16, and 24.
+
+
+
+
Function
Max err (%)*
Avg err (%)
Note
+
+
sin/cos (BAM)
0.4578
0.0076
fr_sin_bam/fr_cos_bam direct; 129-entry table
+
sin/cos (deg)
0.4578
0.0076
FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
+
sin/cos (rad)
0.6104
0.0085
fr_sin/fr_cos via fr_rad_to_bam ±2π r16
+
tan (BAM)
0.5823
0.0008
fr_tan_bam 65536-pt full; ±maxint at poles
+
tan (deg)
0.5311
0.0008
FR_Tan ±360° s15.16 full; sat at poles
+
tan (rad)
13.4069
0.0029
fr_tan ±2π r16 full; sat at poles
+
asin / acos
0.8743
0.0301
65536-pt; sqrt approx near boundary
+
atan2
0.5100
0.0237
65536x5 radii; asin/acos+hypot_fast8
+
atan
0.3390
0.0154
20001-pt full sweep [-10,10]; via FR_atan2
+
sqrt
0.0239
0.0000
Round-to-nearest
+
log2
0.0286
0.0029
65-entry mantissa table
+
pow2
0.0019
0.0003
65-entry fraction table
+
ln, log10
0.0004
0.0000
Via FR_MULK28 from log2
+
exp
0.0003
0.0000
FR_MULK28 + FR_pow2
+
exp_fast
0.0009
0.0001
Shift-only scaling
+
pow10
0.0007
0.0000
FR_MULK28 + FR_pow2
+
pow10_fast
0.0028
0.0002
Shift-only scaling
+
hypot (exact)
0.0000
0.0000
64-bit intermediate
+
hypot_fast8 (8-seg)
0.0915
0.0320
Shift-only, no multiply
+
+
+
*Relative error; reference clamped to 1% of full-scale output.
*Relative error; reference clamped to 1% of full-scale output.
"$'\n'
replacement+=""
perl -0777 -i -pe "
diff --git a/src/FR_math.c b/src/FR_math.c
index 3a7a1d4..45c75c2 100644
--- a/src/FR_math.c
+++ b/src/FR_math.c
@@ -37,85 +37,208 @@
#endif
/*=======================================================
- * BAM-native trig: fr_cos_bam, fr_sin_bam, fr_cos, fr_sin, fr_tan
+ * Full-precision radian/degree → BAM conversion helpers
+ *
+ * rad_to_bam_full(r) returns a full s32 BAM value where:
+ * upper 16 bits = integer BAM (the u16 table index)
+ * lower 16 bits = sub-BAM fractional part
+ * Input r must already be normalized to radix 16 and reduced to [-pi, pi].
+ *
+ * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are
+ * reordered so intermediate sums stay within s32 for |r| <= pi at r16.
+ */
+static s32 rad_to_bam_full(s32 r)
+{
+ /* 10 terms: 65536/(2*pi) ≈ 10430.37835...
+ * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14
+ * = 10430.378357 (~21-bit accuracy)
+ * Terms reordered: interleave negatives early to keep all intermediate
+ * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */
+ return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14);
+}
+
+/* deg_to_bam_full(d) — same idea for degrees.
+ * Input d must already be normalized to radix 16 and reduced to [-90, 90).
+ * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM).
+ * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */
+static s32 deg_to_bam_full(s32 d)
+{
+ return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+}
+
+/* Normalize a fixed-radix value to radix 16. */
+static s32 normalize_to_r16(s32 val, u16 radix)
+{
+ return (radix > 16) ? (val >> (radix - 16))
+ : (radix < 16) ? (val << (16 - radix))
+ : val;
+}
+
+/* Reduce non-negative radian (at r16) to [0, 2*pi).
+ * Helper used by range_reduce_rad and the near-pi small-angle paths. */
+static s32 reduce_to_2pi(s32 r)
+{
+ const s32 two_pi = FR_TWO_PI(16); /* 411775 */
+ if (r > (two_pi << 1))
+ r -= (r / two_pi) * two_pi;
+ else if (r > two_pi)
+ r -= two_pi;
+ return r;
+}
+
+/* Range-reduce radian value (at r16, non-negative) to [-pi, pi].
+ * Caller guarantees r >= 0 (sign is handled externally). */
+static s32 range_reduce_rad(s32 r)
+{
+ r = reduce_to_2pi(r);
+ if (r > FR_PI(16))
+ r -= FR_TWO_PI(16);
+ return r;
+}
+
+/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion.
+ * Normalizes to r16, reduces via positive-only path, applies shift-only multiply.
+ * Handles inputs beyond ±2*pi with modulus (slow path). */
+u16 fr_rad_to_bam(s32 rad, u16 radix)
+{
+ s32 r = normalize_to_r16(rad, radix);
+ /* BAM wraps naturally in u16, but range_reduce expects non-negative.
+ * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */
+ s32 sign = 1;
+ if (r < 0) { r = -r; sign = -1; }
+ r = range_reduce_rad(r);
+ s32 bam_full = rad_to_bam_full(r);
+ if (sign < 0) bam_full = -bam_full;
+ return (u16)((bam_full + (1 << 15)) >> 16);
+}
+
+/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion.
+ * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */
+u16 fr_deg_to_bam(s32 deg, u16 radix)
+{
+ s32 d = normalize_to_r16(deg, radix);
+
+ /* Reduce to [-180, 180) */
+ if (d >= FR_D360_R16 || d < -FR_D360_R16) {
+ s32 n = d / FR_D360_R16;
+ d -= n * FR_D360_R16;
+ }
+ if (d >= FR_D180_R16) d -= FR_D360_R16;
+ if (d < -FR_D180_R16) d += FR_D360_R16;
+
+ /* Reduce to [-90, 90) with BAM quadrant offset */
+ u16 offset = 0;
+ if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; }
+ else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+ return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16));
+}
+
+/*=======================================================
+ * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan
*
* Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
* select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
* quadrants (1, 3) reverse the in-quadrant index so the table is always read
- * in the same direction. Quadrants 1 and 2 get their sign flipped at the
- * end.
- *
- * Within each quadrant, the upper FR_TRIG_TABLE_BITS bits of the
- * in-quadrant value index the table; the lower FR_TRIG_FRAC_BITS bits drive
- * round-to-nearest linear interpolation between adjacent table entries.
- *
- * The last entry (table[FR_TRIG_TABLE_SIZE-1] = 0) means the
- * interpolation at the very edge of the quadrant never reads out of bounds.
- *
- * Rounding: we interpolate as
- * v = lo - ((d * frac + HALF) >> FRAC_BITS)
- * where d = lo - hi (which is >= 0 because cos is monotonically decreasing
- * on [0, pi/2]). Using the subtract form guarantees the argument of >> is
- * always non-negative, so the behavior is portable C89 (no reliance on
- * implementation-defined right-shift of negative integers) and the +HALF
- * gives unambiguous round-half-up. Max error vs the true cos is ~1 LSB of
- * s0.15 (~3e-5 absolute); mean error ~0 (no bias).
+ * in the same direction.
+ *
+ * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at
+ * index 128). After mirroring, small full_pos → small output (near zero),
+ * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles
+ * below one table step (~0.7°). This eliminates table quantization error
+ * in the region where it matters most.
+ *
+ * Sign rule: quadrants 2 and 3 negate the result.
+ * Mirror rule: quadrants 1 and 3 flip the in-quadrant position.
*/
-s32 fr_cos_bam(u16 bam)
+s32 fr_sin_bam(u16 bam)
{
- u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */
- u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */
- u32 idx, frac;
- s32 lo, hi, d, v;
-
- /* Exact cardinal angles: bam=0 → 1.0, bam=16384 → 0, etc. */
- if (inq == 0)
- {
- if (q == 0) return FR_TRIG_ONE; /* 0° → 1.0 */
- if (q == 2) return -FR_TRIG_ONE; /* 180° → -1.0 */
- return 0; /* 90° or 270° → 0 */
+ u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */
+ u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */
+
+ /* Exact cardinal angles */
+ if (inq == 0) {
+ if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */
+ if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */
+ return -FR_TRIG_ONE; /* 270° → -1.0 */
}
+ /* Odd quadrants mirror: read table from the far end */
if (q == 1 || q == 3)
- inq = FR_TRIG_QUADRANT - inq; /* mirror across pi/2 */
-
- idx = inq >> FR_TRIG_FRAC_BITS; /* table index [0..SIZE-1] */
- frac = inq & FR_TRIG_FRAC_MASK; /* interp fraction */
- lo = gFR_COS_TAB_Q[idx];
- hi = gFR_COS_TAB_Q[idx + 1];
- d = lo - hi; /* >= 0: cos monotonic */
- v = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
-
- if (v < 0x40) {
- /* Near zero crossing: redo interpolation with 3 extra bits of
- * precision to reduce rounding error when the result is small. */
- s32 lo3 = (s32)gFR_COS_TAB_Q[idx] << 3;
- s32 d3 = lo3 - ((s32)gFR_COS_TAB_Q[idx + 1] << 3);
- v = lo3 - (((d3 * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
- v = (v + 2) >> 2; /* s0.18 → s15.16 with rounding */
+ inq = FR_TRIG_QUADRANT - inq;
+
+ s32 v;
+
+ /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step).
+ * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384.
+ * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */
+ if (inq < FR_TRIG_FRAC_MAX) {
+ v = (s32)(((u32)inq * 102944u + 8192u) >> 14);
} else {
- v <<= 1; /* s0.15 → s15.16 */
+ /* Table lookup with 7-bit interpolation fraction */
+ u32 idx = inq >> FR_TRIG_FRAC_BITS;
+ u32 frac = inq & FR_TRIG_FRAC_MASK;
+ s32 lo = (s32)gFR_SIN_TAB_Q[idx];
+ s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1];
+ v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+ v <<= 1; /* u0.15 → s15.16 */
}
- return (q == 1 || q == 2) ? -v : v;
+ return (q >= 2) ? -v : v;
}
-s32 fr_sin_bam(u16 bam)
+s32 fr_cos_bam(u16 bam)
{
- /* sin(x) = cos(x - pi/2) = cos(bam - 16384). The u16 wraparound makes
- * this completely free.
- */
- return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT));
+ /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */
+ return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT));
}
s32 fr_cos(s32 rad, u16 radix)
{
+ if (rad == 0) return FR_TRIG_ONE;
+ s32 r = normalize_to_r16(rad, radix);
+ if (r < 0) r = -r;
+ r = reduce_to_2pi(r);
+ /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ,
+ * cos(3π/2+δ) = sin(δ) ≈ δ. */
+ s32 delta = r - FR_HALF_PI(16);
+ if (delta >= -256 && delta <= 256)
+ return -delta;
+ delta = r - FR_THREE_HALF_PI(16);
+ if (delta >= -256 && delta <= 256)
+ return delta;
return fr_cos_bam(fr_rad_to_bam(rad, radix));
}
s32 fr_sin(s32 rad, u16 radix)
{
- return fr_sin_bam(fr_rad_to_bam(rad, radix));
+ if (rad == 0) return 0;
+ s32 r = normalize_to_r16(rad, radix);
+ s32 sign = 1;
+ if (r < 0) { r = -r; sign = -1; }
+ r = reduce_to_2pi(r);
+ /* Near 0 after reduction: sin(δ) ≈ δ */
+ if (r < 256) {
+ s32 v = r;
+ return (sign < 0) ? -v : v;
+ }
+ /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */
+ s32 delta = r - FR_PI(16);
+ if (delta >= -256 && delta <= 256) {
+ s32 v = -delta;
+ return (sign < 0) ? -v : v;
+ }
+ /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */
+ delta = FR_TWO_PI(16) - r;
+ if (delta >= 0 && delta < 256) {
+ s32 v = -delta;
+ return (sign < 0) ? -v : v;
+ }
+ /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */
+ if (r > FR_PI(16)) r -= FR_TWO_PI(16);
+ u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16);
+ s32 v = fr_sin_bam(bam);
+ return (sign < 0) ? -v : v;
}
/*=======================================================
@@ -205,59 +328,219 @@ s32 fr_tan_bam(u16 bam)
return (sign < 0) ? -raw : raw;
}
-/* fr_tan: returns tan at s15.16 (radix 16). Uses BAM-native table.
- * At exact poles, fr_tan_bam's sign convention is based on BAM quadrant
- * which loses the original approach direction. Fix up: if the result
- * saturates, the sign should match the sign of the radian input. */
-s32 fr_tan(s32 rad, u16 radix)
+/* fr_tan — radian-input tangent with full sub-BAM precision.
+ *
+ * Goes directly to the 65-entry octant tangent table with 16-bit
+ * interpolation precision. Sign from quadrant, magnitude from table.
+ * No s64 intermediates. One 32-bit division in the second-octant path.
+ *
+ * Architecture:
+ * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-)
+ * 2. Magnitude: from octant table lookup + reciprocal identity
+ * - First octant [0,45°): direct table lerp
+ * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal
+ * 3. Return sign * magnitude */
+
+/* Internal: given a full s32 BAM, compute |tan| directly from the table.
+ * Returns the unsigned magnitude (always >= 0). */
+static s32 tan_mag_from_bam_full(s32 bam_full)
{
- s32 result = fr_tan_bam(fr_rad_to_bam(rad, radix));
- if (result == FR_TRIG_MAXVAL && rad < 0)
- return -FR_TRIG_MAXVAL;
- if (result == -FR_TRIG_MAXVAL && rad > 0)
+ u16 bam0 = (u16)(bam_full >> 16);
+ u32 frac_sub = (u32)bam_full & 0xFFFFu;
+
+ u32 q = ((u32)bam0 >> 14) & 0x3u;
+ u32 inq = (u32)bam0 & 0x3FFFu;
+
+ /* Exact zeros: tan(0°) = tan(180°) = 0 */
+ if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2))
+ return 0;
+
+ /* Exact poles: tan(90°) = tan(270°) → saturate */
+ if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3))
return FR_TRIG_MAXVAL;
- return result;
+
+ /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range.
+ * After this, full_pos represents distance from the nearest zero. */
+ u32 full_pos;
+ if (q == 1 || q == 3)
+ full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub;
+ else
+ full_pos = ((u32)inq << 16) + frac_sub;
+
+ /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */
+ s32 raw;
+ if (full_pos < ((u32)FR_TAN_OCTANT << 16)) {
+ /* First octant [0, 45°): direct table lookup.
+ * 64 table intervals, each 2^23 sub-BAM units wide. */
+ u32 idx = full_pos >> 23;
+ u32 frac16 = (full_pos >> 7) & 0xFFFFu;
+
+ s32 lo = (s32)gFR_TAN_TAB_O[idx];
+ s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+ if (raw < 0x40) {
+ /* Near zero: redo with 4 extra bits of precision */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+ raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */
+ } else {
+ raw <<= 1; /* u0.15 → s15.16 */
+ }
+ } else {
+ /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x).
+ * Complement = distance from pole, in first-octant range. */
+ u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos;
+
+ u32 idx = comp >> 23;
+ u32 frac16 = (comp >> 7) & 0xFFFFu;
+
+ s32 lo = (s32)gFR_TAN_TAB_O[idx];
+ s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+ if (raw < 0x40) {
+ /* Near pole: redo with 4 extra bits then reciprocal */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+ if (raw_hp < 32)
+ raw = FR_TRIG_MAXVAL;
+ else
+ raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+ } else {
+ raw = (s32)(0x80000000u / (u32)raw);
+ }
+ }
+ return raw;
+}
+
+s32 fr_tan(s32 rad, u16 radix)
+{
+ if (rad == 0) return 0;
+ /* tan(-x) = -tan(x): factor out sign, reduce positive */
+ s32 r = normalize_to_r16(rad, radix);
+ s32 tan_sign = 1;
+ if (r < 0) { r = -r; tan_sign = -1; }
+ r = reduce_to_2pi(r);
+ /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */
+ s32 delta = r - FR_PI(16);
+ if (delta >= -256 && delta <= 256) {
+ return (tan_sign < 0) ? -delta : delta;
+ }
+ /* Full pipeline */
+ if (r > FR_PI(16))
+ r -= FR_TWO_PI(16);
+ s32 bam_full = rad_to_bam_full(r);
+
+ /* Sign from quadrant of the BAM position */
+ u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+ s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+
+ s32 mag = tan_mag_from_bam_full(bam_full);
+ return (sign < 0) ? -mag : mag;
}
/*=======================================================
- * Integer-degree and fixed-radix-degree trig wrappers
+ * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then
+ * call the BAM-native functions. Cardinal angles are exact.
*/
-s32 FR_Cos(s32 deg, u16 radix)
+
+s32 fr_cos_deg(s32 deg, u16 radix)
{
- u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
- return fr_cos_bam(bam);
+ if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg));
+ if (deg < 0) deg = -deg;
+ /* Exact cardinal angles */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 rem = (deg >> radix) % 360;
+ if (rem == 0) return FR_TRIG_ONE;
+ if (rem == 90) return 0;
+ if (rem == 180) return -FR_TRIG_ONE;
+ if (rem == 270) return 0;
+ }
+ /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180,
+ * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */
+ s32 d = normalize_to_r16(deg, radix);
+ if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; }
+ {
+ const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+ s32 delta = d - FR_D90_R16;
+ if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+ s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+ return -dr;
+ }
+ delta = d - (FR_D90_R16 + FR_D180_R16);
+ if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+ s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+ return dr;
+ }
+ }
+ return fr_cos_bam(fr_deg_to_bam(deg, radix));
}
-s32 FR_Sin(s32 deg, u16 radix)
+s32 fr_sin_deg(s32 deg, u16 radix)
{
- u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
- return fr_sin_bam(bam);
+ if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg));
+ s32 sign = 1;
+ if (deg < 0) { deg = -deg; sign = -1; }
+ /* Exact cardinal angles */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 rem = (deg >> radix) % 360;
+ if (rem == 0) return 0;
+ if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE;
+ if (rem == 180) return 0;
+ if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE;
+ }
+ s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix));
+ return (sign < 0) ? -v : v;
}
s32 FR_TanI(s32 deg)
{
- /* Exact pole: deg mod 180 == ±90. Sign matches input sign
- * (positive deg → +MAXVAL, negative deg → -MAXVAL). */
+ /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */
s32 rem = deg % 180;
if (rem == 90 || rem == -90)
return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
return fr_tan_bam(FR_DEG2BAM_I(deg));
}
-s32 FR_Tan(s32 deg, u16 radix)
+/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */
+static s32 range_reduce_deg_bam_full(s32 deg, u16 radix)
{
- /* Check for exact integer poles before using the shift-only DEG2BAM
- * macro, which can map to the wrong BAM quadrant for large angles.
- * Only trigger when fractional bits are zero (exact pole). */
+ s32 d = normalize_to_r16(deg, radix);
+ if (d >= FR_D360_R16) {
+ s32 n = d / FR_D360_R16;
+ d -= n * FR_D360_R16;
+ }
+ if (d >= FR_D180_R16) d -= FR_D360_R16;
+ s32 offset = 0;
+ if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; }
+ else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; }
+ return offset + deg_to_bam_full(d);
+}
+
+s32 fr_tan_deg(s32 deg, u16 radix)
+{
+ if (radix == 0) return FR_TanI(deg);
+ /* tan(-x) = -tan(x): factor out sign, reduce positive */
+ s32 tan_sign = 1;
+ if (deg < 0) { deg = -deg; tan_sign = -1; }
+ /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */
s32 frac_mask = (1 << radix) - 1;
if ((deg & frac_mask) == 0) {
s32 deg_int = deg >> radix;
s32 rem = deg_int % 180;
- if (rem == 90 || rem == -90)
- return (deg >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+ if (rem == 0) return 0;
+ if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
}
- u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix);
- return fr_tan_bam(bam);
+ s32 bam_full = range_reduce_deg_bam_full(deg, radix);
+ u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+ s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+ s32 mag = tan_mag_from_bam_full(bam_full);
+ return (sign < 0) ? -mag : mag;
}
/*=======================================================
@@ -321,11 +604,9 @@ s32 FR_FixAddSat(s32 x, s32 y)
/* FR_acos — returns radians at out_radix.
* Range: [0, pi]. Input is a cosine value at the given radix.
*
- * Uses the same 129-entry cosine table as fr_cos_bam, but in reverse:
- * binary-search to find the bracketing pair, then linear-interpolate
- * the fractional position between them to recover the full 14-bit
- * in-quadrant BAM. This mirrors the forward path and gives matching
- * precision (~1 LSB of s15.16 output).
+ * Uses the 129-entry sine table in reverse: binary-search the ascending
+ * table to find asin(|input|), then acos = pi/2 - asin (with sign handling
+ * for the second quadrant).
*/
s32 FR_acos(s32 input, u16 radix, u16 out_radix)
{
@@ -335,14 +616,11 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
s32 idx, d, num, frac;
s32 input_abs;
- /* Work with absolute value at the caller's radix — we'll need it for
- * the sqrt fast path before quantising to r15. */
+ /* Work with absolute value at the caller's radix */
sign = (s16)((input < 0) ? 1 : 0);
input_abs = sign ? -input : input;
- /* Clamp at the caller's radix — not at r15. Near ±1.0 the r15
- * quantisation can round to 32767 even when the caller has sub-LSB
- * precision that the sqrt fast path can use. */
+ /* Clamp at the caller's radix */
{
s32 one = (s32)1 << radix;
if (input_abs >= one)
@@ -351,16 +629,11 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
- /* Small-angle fast path: when cos(θ) is close to 1.0, the table
- * has only 2-8 LSBs of gap per entry, so linear interpolation is
- * very coarse. Use the identity acos(x) ≈ sqrt(2*(1-x)).
- *
- * Key: compute 1-x at the CALLER's radix, not r15. Near ±1.0 the
- * r15 quantisation crushes many distinct inputs to the same value
- * (cos(179.5°)..cos(179.9°) all round to 32767 at r15). The
- * caller's higher-radix bits carry the angular information via the
- * identity sin(θ) = sqrt(2(1-cos θ)) — effectively the sin trick. */
- if (v > gFR_COS_TAB_Q[7])
+ /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table
+ * has poor resolution near the top (entries close together).
+ * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121]
+ * means the input is > cos(7*π/256) ≈ 0.9975. */
+ if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8])
{
s32 one = (s32)1 << radix;
s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */
@@ -372,35 +645,27 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
return rad_out;
}
- /* Below this point we need the sign-stripped r15 value for the
- * binary search. (v was already computed from input_abs above.) */
-
- /* Binary search on the cosine quadrant table. The table is
- * monotonically decreasing: gFR_COS_TAB_Q[0] = 32767 (cos 0°),
- * gFR_COS_TAB_Q[128] = 0 (cos 90°).
+ /* Binary search on the ascending sine table.
+ * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°).
*
- * After the search, lo is the first index where table[lo] <= v,
- * so the bracketing pair is (lo-1, lo) with table[lo-1] >= v >= table[lo].
- */
+ * Find the first index where table[idx] >= v. */
lo = 0;
hi = FR_TRIG_TABLE_SIZE;
while (lo < hi)
{
mid = (lo + hi) >> 1;
- if (gFR_COS_TAB_Q[mid] > v)
+ if ((s32)gFR_SIN_TAB_Q[mid] < v)
lo = mid + 1;
else
hi = mid;
}
- /* lo is now the index where table[lo] <= v. The bracketing interval
- * is [lo-1, lo] (table decreasing). Clamp idx to valid range.
- */
+ /* lo is now the first index where table[lo] >= v.
+ * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo].
+ * This gives us the asin angle; acos = pi/2 - asin. */
idx = lo;
if (idx <= 0)
{
- /* v >= table[0] = 32767 — essentially cos(0), already clamped above
- * but guard anyway. */
idx = 0;
frac = 0;
}
@@ -411,26 +676,24 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix)
}
else
{
- /* Linear interpolate between table[idx-1] and table[idx].
- * d = table[idx-1] - table[idx] (>= 0, cos decreasing)
- * num = table[idx-1] - v (how far past table[idx-1])
- * frac = (num << FR_TRIG_FRAC_BITS) / d, in [0, FR_TRIG_FRAC_MAX)
- *
- * num and d are both in [0, 32767], so num << 7 fits in 22 bits.
+ /* Interpolate between table[idx-1] and table[idx].
+ * d = table[idx] - table[idx-1] (>= 0, sin increasing)
+ * num = v - table[idx-1] (how far past table[idx-1])
*/
- d = gFR_COS_TAB_Q[idx - 1] - gFR_COS_TAB_Q[idx];
- num = gFR_COS_TAB_Q[idx - 1] - v;
+ d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1];
+ num = v - (s32)gFR_SIN_TAB_Q[idx - 1];
if (d > 0)
frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d;
else
frac = 0;
- /* Reconstruct: the angle is at index (idx-1) + frac/FRAC_MAX,
- * so shift idx back by 1 for the BAM calculation below. */
idx = idx - 1;
}
{
- u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+ /* asin_bam is the angle in first-quadrant BAM whose sin = v */
+ u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+ /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */
+ u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam);
if (sign)
bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */
return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
diff --git a/src/FR_math.h b/src/FR_math.h
index a251316..ca4b096 100644
--- a/src/FR_math.h
+++ b/src/FR_math.h
@@ -258,7 +258,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
/*================================================
* Constants used in Trig tables, definitions
*
- * FR_TRIG_PREC — internal table precision (s0.15, kept for table indexing)
+ * FR_TRIG_PREC — internal table precision (u0.15, sine table)
* FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1)
* FR_TRIG_ONE — exact 1.0 in output format (1 << 16 = 65536)
*
@@ -328,8 +328,10 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
#define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10))
/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits).
- * CAUTION: overflows s32 when |x| > ~256 deg at s15.16 (x<<7 term).
- * For safe conversion at any radix, use fr_deg_to_bam() instead. */
+ * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term),
+ * but the overflow is harmless when the result is truncated to u16 BAM
+ * (two's complement wrapping preserves modular correctness).
+ * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */
#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9))
/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */
@@ -337,9 +339,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits).
* CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term).
- * For safe conversion at any radix, use fr_rad_to_bam() instead. */
-#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3))
-
+ * For safe conversion at any radix, use fr_rad_to_bam() instead.
+ * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */
+#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14))
/* ── Overflow-safe rad/deg to BAM conversion functions ─────────────
*
* These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that
@@ -353,52 +355,25 @@ static inline s32 FR_div_rnd(s64 num, s32 den) {
* fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe.
*/
-/* Constants at radix 16 */
-#define FR_PI_R16 205887 /* round(pi * 65536) */
-#define FR_TWO_PI_R16 411775 /* round(2*pi * 65536) */
-#define FR_D90_R16 5898240 /* 90 * 65536 */
-#define FR_D180_R16 11796480 /* 180 * 65536 */
-#define FR_D360_R16 23592960 /* 360 * 65536 */
+/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc.
+ * Compiler evaluates at compile time when r is a constant.
+ * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */
+#define FR_PI(r) ((s32)(3.14159265358979323846 * (1LL << (r)) + 0.5))
+#define FR_TWO_PI(r) ((s32)(6.28318530717958647692 * (1LL << (r)) + 0.5))
+#define FR_HALF_PI(r) ((s32)(1.57079632679489661923 * (1LL << (r)) + 0.5))
+#define FR_THREE_HALF_PI(r) ((s32)(4.71238898038468985769 * (1LL << (r)) + 0.5))
-static u16 __attribute__((unused)) fr_rad_to_bam(s32 rad, u16 radix)
-{
- /* Normalize to radix 16 */
- s32 r = (radix > 16) ? (rad >> (radix - 16))
- : (radix < 16) ? (rad << (16 - radix))
- : rad;
-
- /* Reduce to [-pi, pi] — one conditional pass, covers ±2*pi input */
- if (r > FR_PI_R16) r -= FR_TWO_PI_R16;
- if (r < -FR_PI_R16) r += FR_TWO_PI_R16;
-
- /* Shift terms reordered: interleave negatives early to keep all
- * intermediate sums within s32. Same 7-term decomposition as
- * FR_RAD2BAM, just reordered. Safe for |r| <= 205887 (pi). */
- s32 bam = (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1);
- return (u16)((bam + (1 << 15)) >> 16);
-}
+/* Convenience aliases at radix 16 */
+#define FR_PI_R16 FR_PI(16)
+#define FR_TWO_PI_R16 FR_TWO_PI(16)
-static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix)
-{
- /* Normalize to radix 16 */
- s32 d = (radix > 16) ? (deg >> (radix - 16))
- : (radix < 16) ? (deg << (16 - radix))
- : deg;
-
- /* Reduce to [-180, 180) — covers ±360 input */
- if (d >= FR_D180_R16) d -= FR_D360_R16;
- if (d < -FR_D180_R16) d += FR_D360_R16;
-
- /* Reduce to [-90, 90) with BAM quadrant offset.
- * Needed because 182 * 11796480 (±180° at r16) overflows s32. */
- u16 offset = 0;
- if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; }
- else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
-
- /* |d| < 90° at r16. Max intermediate = 5898240 * 192 = 1.13B, safe. */
- s32 bam = (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
- return (u16)(offset + (u16)((bam + (1 << 15)) >> 16));
-}
+/* Degree constants at radix 16 (exact — no truncation) */
+#define FR_D90_R16 ((s32)90 << 16)
+#define FR_D180_R16 ((s32)180 << 16)
+#define FR_D360_R16 ((s32)360 << 16)
+
+ u16 fr_rad_to_bam(s32 rad, u16 radix);
+ u16 fr_deg_to_bam(s32 deg, u16 radix);
/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */
#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25))
@@ -441,13 +416,25 @@ static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix)
* fr_cos(rad, radix) — cos of radians at radix, s15.16 result
* fr_sin(rad, radix) — sin of radians at radix, s15.16 result
* fr_tan(rad, radix) — tan of radians at radix, s15.16 result
- * fr_cos_deg(deg) — cos of integer degrees, s15.16 result
- * fr_sin_deg(deg) — sin of integer degrees, s15.16 result
+ * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result
+ * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result
+ * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result
*
* All go through the same 129-entry quadrant table with linear interpolation.
* Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four
* cardinal angles where the result is exact.
+ *
+ * FR_USE_EXTENDED_TRIG_PREC (default: ON) enables sub-BAM interpolation
+ * in fr_sin/fr_cos/fr_tan (the radian/degree-input functions). This adds
+ * one extra multiply per call but recovers ~16 bits of sub-BAM precision.
+ * To disable (faster, no multiply in the trig hot path):
+ *
+ * #define FR_USE_EXTENDED_TRIG_PREC 0
+ * #include "FR_math.h"
*/
+#ifndef FR_USE_EXTENDED_TRIG_PREC
+#define FR_USE_EXTENDED_TRIG_PREC 1
+#endif
s32 fr_cos_bam(u16 bam);
s32 fr_sin_bam(u16 bam);
s32 fr_tan_bam(u16 bam);
@@ -458,26 +445,32 @@ static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix)
/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */
#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360))
-#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg))
-#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg))
+/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */
+/* #define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
+/* #define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */
/*===============================================
- * Integer-degree trig API (thin wrappers over the BAM-native path)
- *
- * FR_CosI(deg) — cos of integer degrees, s15.16 result
- * FR_SinI(deg) — sin of integer degrees, s15.16 result
- * FR_TanI(deg) — tan of integer degrees, s15.16 result
- * FR_Cos(deg, radix) — cos of fixed-radix degrees, s15.16 result
- * FR_Sin(deg, radix) — sin of fixed-radix degrees, s15.16 result
- * FR_Tan(deg, radix) — tan of fixed-radix degrees, s15.16 result
+ * Degree-input trig API
+ *
+ * FR_CosI(deg) — cos of integer degrees, s15.16 result
+ * FR_SinI(deg) — sin of integer degrees, s15.16 result
+ * FR_TanI(deg) — tan of integer degrees, s15.16 result
+ * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result
+ * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result
+ * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result
*/
#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg))
#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg))
- s32 FR_Cos(s32 deg, u16 radix);
- s32 FR_Sin(s32 deg, u16 radix);
+ s32 fr_cos_deg(s32 deg, u16 radix);
+ s32 fr_sin_deg(s32 deg, u16 radix);
s32 FR_TanI(s32 deg);
- s32 FR_Tan(s32 deg, u16 radix);
+ s32 fr_tan_deg(s32 deg, u16 radix);
+
+ /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */
+ #define FR_Sin fr_sin_deg
+ #define FR_Cos fr_cos_deg
+ #define FR_Tan fr_tan_deg
/* Inverse trig — output in radians at caller-specified radix (s32).
* FR_atan2 returns radians at radix 16 (s15.16).
diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h
index 983c4e2..f57edd6 100644
--- a/src/FR_trig_table.h
+++ b/src/FR_trig_table.h
@@ -1,14 +1,14 @@
/**
- * @file FR_trig_table.h - 129-entry quadrant cosine table for FR_Math 2.0
+ * @file FR_trig_table.h - 129-entry quadrant sine table for FR_Math 2.0
*
* This table covers one quadrant [0, pi/2] inclusive in 128 intervals (so
* 129 entries). Indexed by a 7-bit BAM (binary angular measure) sub-index.
- * Used by fr_cos_bam / fr_sin_bam in FR_math.c.
+ * Used by fr_sin_bam / fr_cos_bam in FR_math.c.
*
- * Output format: s0.15 (signed, 15 fractional bits). So
- * gFR_COS_TAB_Q[0] = round(cos(0) * 32767) = 32767
- * gFR_COS_TAB_Q[64] = round(cos(pi/4) * 32767) ~ 23170
- * gFR_COS_TAB_Q[128] = round(cos(pi/2) * 32767) = 0
+ * Output format: u0.15 (unsigned, 15 fractional bits). So
+ * gFR_SIN_TAB_Q[0] = round(sin(0) * 32768) = 0
+ * gFR_SIN_TAB_Q[64] = round(sin(pi/4) * 32768) = 23170
+ * gFR_SIN_TAB_Q[128] = round(sin(pi/2) * 32768) = 32768
*
* Generated by tools/coef-gen.py — do not hand-edit.
*
@@ -27,7 +27,7 @@ extern "C" {
#define FR_TRIG_TABLE_BITS (7) /* log2(intervals) */
#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) /* entries = intervals + 1 */
-/* Derived constants for fr_cos_bam / fr_sin_bam.
+/* Derived constants for fr_sin_bam / fr_cos_bam.
*
* The BAM has 16 bits total: 2 top bits for quadrant, 14 bits in-quadrant.
* The in-quadrant value is split into (FR_TRIG_TABLE_BITS) table-index bits
@@ -44,24 +44,24 @@ extern "C" {
#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) /* rounding bias */
#define FR_TRIG_QUADRANT (1 << 14) /* in-quadrant span */
-static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = {
- 32767, 32765, 32757, 32745, 32728, 32705, 32678, 32646,
- 32609, 32567, 32521, 32469, 32412, 32351, 32285, 32213,
- 32137, 32057, 31971, 31880, 31785, 31685, 31580, 31470,
- 31356, 31237, 31113, 30985, 30852, 30714, 30571, 30424,
- 30273, 30117, 29956, 29791, 29621, 29447, 29268, 29085,
- 28898, 28706, 28510, 28310, 28105, 27896, 27683, 27466,
- 27245, 27019, 26790, 26556, 26319, 26077, 25832, 25582,
- 25329, 25072, 24811, 24547, 24279, 24007, 23731, 23452,
- 23170, 22884, 22594, 22301, 22005, 21705, 21403, 21096,
- 20787, 20475, 20159, 19841, 19519, 19195, 18868, 18537,
- 18204, 17869, 17530, 17189, 16846, 16499, 16151, 15800,
- 15446, 15090, 14732, 14372, 14010, 13645, 13279, 12910,
- 12539, 12167, 11793, 11417, 11039, 10659, 10278, 9896,
- 9512, 9126, 8739, 8351, 7962, 7571, 7179, 6786,
- 6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612,
- 3212, 2811, 2410, 2009, 1608, 1206, 804, 401,
- 0
+static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = {
+ 0, 402, 804, 1206, 1608, 2009, 2411, 2811,
+ 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998,
+ 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127,
+ 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167,
+ 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091,
+ 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869,
+ 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475,
+ 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884,
+ 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073,
+ 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020,
+ 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707,
+ 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118,
+ 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238,
+ 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058,
+ 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568,
+ 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766,
+ 32768
};
/* ---- Tangent table: 65 entries covering one octant [0, pi/4] ----
diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp
index 3daaff9..f1d7c5d 100644
--- a/tests/test_tdd.cpp
+++ b/tests/test_tdd.cpp
@@ -58,7 +58,7 @@
* ============================================================ */
static inline double frd(s32 x, int radix) {
- return (double)x / (double)(1L << radix);
+ return (double)x / ldexp(1.0, radix);
}
typedef struct {
@@ -73,13 +73,19 @@ typedef struct {
double worst_pct_input; /* input that produced max pct error */
double worst_pct_actual;
double worst_pct_expected;
+ /* Clamped-denominator relative error: denom = max(|expected|, 1% of full_scale) */
+ double max_pct_err_clamped;
+ double sum_pct_err_clamped;
+ double worst_clamped_input;
+ double worst_clamped_actual;
+ double worst_clamped_expected;
} stats_t;
static void stats_reset(stats_t *s) {
memset(s, 0, sizeof(*s));
}
-static void stats_add(stats_t *s, double in, double actual, double expected) {
+static void stats_add(stats_t *s, double in, double actual, double expected, double full_scale) {
double e = actual - expected;
if (e < 0) e = -e;
if (s->n == 0 || e > s->max_abs_err) {
@@ -97,6 +103,17 @@ static void stats_add(stats_t *s, double in, double actual, double expected) {
s->worst_pct_expected = expected;
}
s->sum_pct_err += pct;
+ /* Clamped-denominator relative error: floor = 1% of full_scale */
+ double floor_val = 0.01 * full_scale;
+ double denom = fabs(expected) > floor_val ? fabs(expected) : floor_val;
+ double pct_clamped = (denom > 0.0) ? (e / denom) * 100.0 : 0.0;
+ if (pct_clamped > s->max_pct_err_clamped) {
+ s->max_pct_err_clamped = pct_clamped;
+ s->worst_clamped_input = in;
+ s->worst_clamped_actual = actual;
+ s->worst_clamped_expected = expected;
+ }
+ s->sum_pct_err_clamped += pct_clamped;
s->n++;
}
@@ -104,8 +121,8 @@ static double stats_mean(const stats_t *s) {
return s->n ? s->sum_abs_err / s->n : 0.0;
}
-static double stats_mean_pct(const stats_t *s) {
- return s->n ? s->sum_pct_err / s->n : 0.0;
+static double stats_mean_pct_clamped(const stats_t *s) {
+ return s->n ? s->sum_pct_err_clamped / s->n : 0.0;
}
/* Quantize a double to s15.16 resolution (same grid as library output). */
@@ -113,8 +130,13 @@ static inline double q16(double x) {
return floor(x * 65536.0 + 0.5) / 65536.0;
}
+/* Round-to-nearest float→fixed conversion (not truncation). */
+static inline s32 tofix(double v, int p) {
+ return (s32)floor(ldexp(v, p) + 0.5);
+}
+
/* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */
-static const double TAN_CLAMP = (double)0x7fffffff / (double)(1L << 16);
+static const double TAN_CLAMP = (double)0x7fffffff / 65536.0;
static double tan_ref(double rad) {
double t = tan(rad);
@@ -129,9 +151,9 @@ static int g_showpeak = 0;
/* Print one accuracy table row, optionally with peak-error input */
static void acc_row(const char *name, const stats_t *s, const char *note) {
printf("| %s | %.4f | %.4f | %s",
- name, s->max_pct_err, stats_mean_pct(s), note);
+ name, s->max_pct_err_clamped, stats_mean_pct_clamped(s), note);
if (g_showpeak)
- printf(" | %.4g", s->worst_pct_input);
+ printf(" | %.4g", s->worst_clamped_input);
printf(" |\n");
}
@@ -647,8 +669,8 @@ static void section_arithmetic(void) {
};
for (int i = 0; i < (int)(sizeof(div_cases)/sizeof(div_cases[0])); i++) {
int r = div_cases[i].r;
- s32 xfp = (s32)(div_cases[i].xd * (1L << r));
- s32 yfp = (s32)(div_cases[i].yd * (1L << r));
+ s32 xfp = tofix(div_cases[i].xd, r);
+ s32 yfp = tofix(div_cases[i].yd, r);
double expected = div_cases[i].xd / div_cases[i].yd;
s32 d64 = FR_DIV(xfp, r, yfp, r);
s32 d32 = FR_DIV32(xfp, r, yfp, r);
@@ -681,8 +703,8 @@ static void section_trig_int(void) {
double exp_sin = sin(deg * M_PI / 180.0);
double act_cos = frd(FR_CosI((s16)deg), FR_TRIG_OUT_PREC);
double act_sin = frd(FR_SinI((s16)deg), FR_TRIG_OUT_PREC);
- stats_add(&cos_stats, deg, act_cos, exp_cos);
- stats_add(&sin_stats, deg, act_sin, exp_sin);
+ stats_add(&cos_stats, deg, act_cos, exp_cos, 1.0);
+ stats_add(&sin_stats, deg, act_sin, exp_sin, 1.0);
}
table_header_stats();
@@ -698,7 +720,7 @@ static void section_trig_int(void) {
if (deg % 90 == 0 && deg != 0) { tan_skipped++; continue; }
double exp_tan = tan(deg * M_PI / 180.0);
double act_tan = frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC);
- stats_add(&tan_stats, deg, act_tan, exp_tan);
+ stats_add(&tan_stats, deg, act_tan, exp_tan, TAN_CLAMP);
}
table_header_stats();
table_row_stats("FR_TanI [-89..89]", &tan_stats);
@@ -736,8 +758,8 @@ static void section_trig_frac(void) {
double exp_s = sin(deg_d * M_PI / 180.0);
double act_c = frd(FR_Cos(deg_fr, 8), FR_TRIG_OUT_PREC);
double act_s = frd(FR_Sin(deg_fr, 8), FR_TRIG_OUT_PREC);
- stats_add(&cos_f, deg_d, act_c, exp_c);
- stats_add(&sin_f, deg_d, act_s, exp_s);
+ stats_add(&cos_f, deg_d, act_c, exp_c, 1.0);
+ stats_add(&sin_f, deg_d, act_s, exp_s, 1.0);
}
table_header_stats();
table_row_stats("FR_Cos r8 0.25 step", &cos_f);
@@ -773,10 +795,11 @@ static void section_inverse_trig(void) {
/* radix 15 inputs, output radians at radix 16, 200 samples */
for (int i = -200; i <= 200; i++) {
double xd = i / 200.0;
- s32 fr = (s32)(xd * (1 << 15));
+ s32 fr = tofix(xd, 15);
+ double actual_xd = frd(fr, 15);
s32 rad = FR_acos(fr, 15, 16);
- double ref_rad = acos(xd);
- stats_add(&acos_stats, xd, frd(rad, 16), ref_rad);
+ double ref_rad = acos(actual_xd);
+ stats_add(&acos_stats, actual_xd, frd(rad, 16), ref_rad, M_PI);
}
table_header_stats();
table_row_stats("FR_acos vs acos() (rad)", &acos_stats);
@@ -787,10 +810,11 @@ static void section_inverse_trig(void) {
stats_reset(&asin_stats);
for (int i = -200; i <= 200; i++) {
double xd = i / 200.0;
- s32 fr = (s32)(xd * (1 << 15));
+ s32 fr = tofix(xd, 15);
+ double actual_xd = frd(fr, 15);
s32 rad = FR_asin(fr, 15, 16);
- double ref_rad = asin(xd);
- stats_add(&asin_stats, xd, frd(rad, 16), ref_rad);
+ double ref_rad = asin(actual_xd);
+ stats_add(&asin_stats, actual_xd, frd(rad, 16), ref_rad, M_PI);
}
table_header_stats();
table_row_stats("FR_asin vs asin() (rad)", &asin_stats);
@@ -826,13 +850,13 @@ static void section_pow_log(void) {
stats_t pow2_stats; stats_reset(&pow2_stats);
for (int i = 0; i < (int)(sizeof(pow2_inputs)/sizeof(pow2_inputs[0])); i++) {
double x = pow2_inputs[i];
- s32 fr = (s32)(x * (1L << 16));
+ s32 fr = tofix(x, 16);
s32 r = FR_pow2(fr, 16);
double rd = frd(r, 16);
double ref = pow(2.0, x);
double err = rd - ref; if (err < 0) err = -err;
double rel = ref != 0.0 ? err / fabs(ref) : err;
- stats_add(&pow2_stats, x, rd, ref);
+ stats_add(&pow2_stats, x, rd, ref, pow(2.0, 8.0));
printf("| %.4g | %ld | %.6g | %.6g | %.4g | %.4g |\n",
x, (long)r, rd, ref, err, rel);
}
@@ -845,11 +869,12 @@ static void section_pow_log(void) {
stats_t pow2_fine; stats_reset(&pow2_fine);
for (int i = -800; i <= 800; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * (1L << 16));
+ s32 fr = tofix(x, 16);
+ double actual_x = frd(fr, 16);
s32 r = FR_pow2(fr, 16);
double rd = frd(r, 16);
- double ref = pow(2.0, x);
- stats_add(&pow2_fine, x, rd, ref);
+ double ref = pow(2.0, actual_x);
+ stats_add(&pow2_fine, actual_x, rd, ref, pow(2.0, 8.0));
}
table_header_stats();
table_row_stats("FR_pow2 [-8,8] step 0.01", &pow2_fine);
@@ -884,7 +909,7 @@ static void section_pow_log(void) {
printf("| %ld | %u | %u | %ld | %.6g | %.6g |\n",
(long)log2_cases[i].in, log2_cases[i].r, log2_cases[i].or_,
(long)r, rd, log2_cases[i].ref);
- stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref);
+ stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref, log2(32000.0));
}
printf("\n");
table_header_stats();
@@ -897,11 +922,11 @@ static void section_pow_log(void) {
double ln_inputs[] = {1, 2, M_E, 4, 8, 10, 100, 1000};
stats_t ln_stats; stats_reset(&ln_stats);
for (int i = 0; i < (int)(sizeof(ln_inputs)/sizeof(ln_inputs[0])); i++) {
- s32 fr = (s32)(ln_inputs[i] * (1L << 16));
+ s32 fr = tofix(ln_inputs[i], 16);
s32 r = FR_ln(fr, 16, 16);
double rd = frd(r, 16);
double ref = log(ln_inputs[i]);
- stats_add(&ln_stats, ln_inputs[i], rd, ref);
+ stats_add(&ln_stats, ln_inputs[i], rd, ref, log(32000.0));
printf("| %.4g | %ld | %.6g | %.6g |\n", ln_inputs[i], (long)r, rd, ref);
}
printf("\n");
@@ -914,11 +939,11 @@ static void section_pow_log(void) {
double log10_inputs[] = {1, 2, 5, 10, 100, 1000, 10000};
stats_t log10_stats; stats_reset(&log10_stats);
for (int i = 0; i < (int)(sizeof(log10_inputs)/sizeof(log10_inputs[0])); i++) {
- s32 fr = (s32)(log10_inputs[i] * (1L << 16));
+ s32 fr = tofix(log10_inputs[i], 16);
s32 r = FR_log10(fr, 16, 16);
double rd = frd(r, 16);
double ref = log10(log10_inputs[i]);
- stats_add(&log10_stats, log10_inputs[i], rd, ref);
+ stats_add(&log10_stats, log10_inputs[i], rd, ref, log10(32000.0));
printf("| %.4g | %ld | %.6g | %.6g |\n", log10_inputs[i], (long)r, rd, ref);
}
printf("\n");
@@ -929,14 +954,14 @@ static void section_pow_log(void) {
md_h3("8.6 FR_EXP and FR_POW10 macros (wrap FR_pow2)");
printf("| Expression | Result | as double | Reference | Note |\n|---|---:|---:|---:|---|\n");
{
- s32 in = (s32)(1.0 * (1L << 16));
+ s32 in = tofix(1.0, 16);
s32 r = FR_EXP(in, 16);
double rd = frd(r, 16);
printf("| FR_EXP(1.0,16) | %ld | %.6g | %.6g | exp(1) = e |\n",
(long)r, rd, M_E);
}
{
- s32 in = (s32)(2.0 * (1L << 16));
+ s32 in = tofix(2.0, 16);
s32 r = FR_POW10(in, 16);
double rd = frd(r, 16);
printf("| FR_POW10(2.0,16) | %ld | %.6g | %.6g | 10^2 = 100 |\n",
@@ -1265,14 +1290,15 @@ static void section_v2_new(void) {
stats_t sqrt_stats; stats_reset(&sqrt_stats);
for (int i = 0; i < (int)(sizeof(sqrt_inputs)/sizeof(sqrt_inputs[0])); i++) {
double x = sqrt_inputs[i];
- s32 fr = (s32)(x * (1L << 16));
+ s32 fr = tofix(x, 16);
+ double actual_x = frd(fr, 16);
s32 r = FR_sqrt(fr, 16);
double rd = frd(r, 16);
- double ref = sqrt(x);
+ double ref = sqrt(actual_x);
double err = rd - ref; if (err < 0) err = -err;
- stats_add(&sqrt_stats, x, rd, ref);
+ stats_add(&sqrt_stats, actual_x, rd, ref, sqrt(32000.0));
printf("| %.6g | %ld | %.6g | %.6g | %.4g |\n",
- x, (long)r, rd, ref, err);
+ actual_x, (long)r, rd, ref, err);
}
printf("\n");
table_header_stats();
@@ -1283,11 +1309,12 @@ static void section_v2_new(void) {
stats_t sqrt_fine; stats_reset(&sqrt_fine);
for (int i = 1; i <= 1000; i++) {
double x = i * 10.0; /* 10..10000 */
- s32 fr = (s32)(x * (1L << 16));
+ s32 fr = tofix(x, 16);
+ double actual_x = frd(fr, 16);
s32 r = FR_sqrt(fr, 16);
double rd = frd(r, 16);
- double ref = sqrt(x);
- stats_add(&sqrt_fine, x, rd, ref);
+ double ref = sqrt(actual_x);
+ stats_add(&sqrt_fine, actual_x, rd, ref, sqrt(32000.0));
}
table_header_stats();
table_row_stats("FR_sqrt [10,10000]", &sqrt_fine);
@@ -1313,16 +1340,16 @@ static void section_v2_new(void) {
};
stats_t hyp_stats; stats_reset(&hyp_stats);
for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) {
- s32 fx = (s32)(hyp_cases[i].x * (1L << 16));
- s32 fy = (s32)(hyp_cases[i].y * (1L << 16));
+ s32 fx = tofix(hyp_cases[i].x, 16);
+ s32 fy = tofix(hyp_cases[i].y, 16);
+ double actual_x = frd(fx, 16), actual_y = frd(fy, 16);
s32 r = FR_hypot(fx, fy, 16);
double rd = frd(r, 16);
- double ref = hypot(hyp_cases[i].x, hyp_cases[i].y);
+ double ref = hypot(actual_x, actual_y);
double err = rd - ref; if (err < 0) err = -err;
- stats_add(&hyp_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y),
- rd, ref);
+ stats_add(&hyp_stats, ref, rd, ref, hypot(1000.0, 1000.0));
printf("| %g | %g | %ld | %.6g | %.6g | %.4g |\n",
- hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err);
+ actual_x, actual_y, (long)r, rd, ref, err);
}
printf("\n");
table_header_stats();
@@ -1334,17 +1361,17 @@ static void section_v2_new(void) {
printf("|---:|---:|---:|---:|---:|---:|---:|\n");
stats_t hf8_stats; stats_reset(&hf8_stats);
for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) {
- s32 fx = (s32)(hyp_cases[i].x * (1L << 16));
- s32 fy = (s32)(hyp_cases[i].y * (1L << 16));
+ s32 fx = tofix(hyp_cases[i].x, 16);
+ s32 fy = tofix(hyp_cases[i].y, 16);
+ double actual_x = frd(fx, 16), actual_y = frd(fy, 16);
s32 r = FR_hypot_fast8(fx, fy);
double rd = frd(r, 16);
- double ref = hypot(hyp_cases[i].x, hyp_cases[i].y);
+ double ref = hypot(actual_x, actual_y);
double err = rd - ref; if (err < 0) err = -err;
double rel = (ref > 0) ? err / ref * 100.0 : 0.0;
- stats_add(&hf8_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y),
- rd, ref);
+ stats_add(&hf8_stats, ref, rd, ref, hypot(1000.0, 1000.0));
printf("| %g | %g | %ld | %.6g | %.6g | %.4g | %.4g |\n",
- hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err, rel);
+ actual_x, actual_y, (long)r, rd, ref, err, rel);
}
printf("\n");
table_header_stats();
@@ -1400,7 +1427,7 @@ static void section_v2_new(void) {
else if (t < 0.50) ideal = 2.0 - 4.0 * t; /* 1 → 0 */
else if (t < 0.75) ideal = -4.0 * (t - 0.5); /* 0 → -1 */
else ideal = -1.0 + 4.0 * (t - 0.75); /* -1 → 0 */
- stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal);
+ stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal, 1.0);
}
table_header_stats();
table_row_stats("fr_wave_tri vs ideal", &tri_stats);
@@ -1486,8 +1513,8 @@ static void section_multiradix(void) {
int log2_radixes[] = {8, 12, 16, 24};
for (int ri = 0; ri < 4; ri++) {
int R = log2_radixes[ri];
- double scale = (double)(1L << R);
- double max_val = (double)((1L << (30 - R))); /* stay well within s32 */
+ double scale = ldexp(1.0, R);
+ double max_val = ldexp(1.0, 30 - R); /* stay well within s32 */
stats_t st; stats_reset(&st);
/* Sweep from 0.125 to max representable value */
@@ -1498,24 +1525,26 @@ static void section_multiradix(void) {
for (int i = 0; i < ninp; i++) {
if (inputs[i] > max_val) continue; /* would overflow s32 */
- s32 fr = (s32)(inputs[i] * scale);
+ s32 fr = tofix(inputs[i], R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_log2(fr, (u16)R, (u16)R);
double rd = frd(r, R);
- double ref = log2(inputs[i]);
- stats_add(&st, inputs[i], rd, ref);
+ double ref = log2(actual_x);
+ stats_add(&st, actual_x, rd, ref, log2(32000.0));
}
/* Fine-grained sweep in [1, min(100, max_val)] */
double sweep_max = max_val < 100.0 ? max_val : 100.0;
for (int i = 1; i <= 500; i++) {
double x = 1.0 + ((sweep_max - 1.0) * i / 500.0);
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_log2(fr, (u16)R, (u16)R);
double rd = frd(r, R);
- double ref = log2(x);
- stats_add(&st, x, rd, ref);
+ double ref = log2(actual_x);
+ stats_add(&st, actual_x, rd, ref, log2(32000.0));
}
double lsb = 1.0 / scale;
@@ -1535,19 +1564,20 @@ static void section_multiradix(void) {
for (int ri = 0; ri < 4; ri++) {
int R = log2_radixes[ri];
- double scale = (double)(1L << R);
- double max_val = (double)((1L << (30 - R)));
+ double scale = ldexp(1.0, R);
+ double max_val = ldexp(1.0, 30 - R);
double sweep_max = max_val < 100.0 ? max_val : 100.0;
stats_t st; stats_reset(&st);
for (int i = 1; i <= 500; i++) {
double x = 0.5 + ((sweep_max - 0.5) * i / 500.0);
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_ln(fr, (u16)R, (u16)R);
double rd = frd(r, R);
- double ref = log(x);
- stats_add(&st, x, rd, ref);
+ double ref = log(actual_x);
+ stats_add(&st, actual_x, rd, ref, log(32000.0));
}
double lsb = 1.0 / scale;
@@ -1567,19 +1597,20 @@ static void section_multiradix(void) {
for (int ri = 0; ri < 4; ri++) {
int R = log2_radixes[ri];
- double scale = (double)(1L << R);
- double max_val = (double)((1L << (30 - R)));
+ double scale = ldexp(1.0, R);
+ double max_val = ldexp(1.0, 30 - R);
double sweep_max = max_val < 1000.0 ? max_val : 1000.0;
stats_t st; stats_reset(&st);
for (int i = 1; i <= 500; i++) {
double x = 0.5 + ((sweep_max - 0.5) * i / 500.0);
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_log10(fr, (u16)R, (u16)R);
double rd = frd(r, R);
- double ref = log10(x);
- stats_add(&st, x, rd, ref);
+ double ref = log10(actual_x);
+ stats_add(&st, actual_x, rd, ref, log10(32000.0));
}
double lsb = 1.0 / scale;
@@ -1600,8 +1631,8 @@ static void section_multiradix(void) {
int div_radixes[] = {8, 12, 16, 20};
for (int ri = 0; ri < 4; ri++) {
int R = div_radixes[ri];
- double scale = (double)(1L << R);
- double max_val = (double)(1L << (30 - R)); /* stay within s32 */
+ double scale = ldexp(1.0, R);
+ double max_val = ldexp(1.0, 30 - R); /* stay within s32 */
stats_t st_rnd, st_trunc;
stats_reset(&st_rnd);
stats_reset(&st_trunc);
@@ -1620,18 +1651,18 @@ static void section_multiradix(void) {
double aq = ay > 0 ? ax / ay : 1e30;
/* Skip if inputs or quotient would overflow s32 at this radix */
if (ax >= max_val || ay >= max_val || aq >= max_val) continue;
- s32 xfp = (s32)(x * scale);
- s32 yfp = (s32)(y * scale);
+ s32 xfp = tofix(x, R);
+ s32 yfp = tofix(y, R);
if (yfp == 0) continue;
- double ref = x / y;
+ double ref = frd(xfp, R) / frd(yfp, R);
s32 d_rnd = FR_DIV(xfp, R, yfp, R);
s32 d_trunc = FR_DIV_TRUNC(xfp, R, yfp, R);
double rd_rnd = frd(d_rnd, R);
double rd_trunc = frd(d_trunc, R);
- stats_add(&st_rnd, x / y, rd_rnd, ref);
- stats_add(&st_trunc, x / y, rd_trunc, ref);
+ stats_add(&st_rnd, x / y, rd_rnd, ref, 1.0);
+ stats_add(&st_trunc, x / y, rd_trunc, ref, 1.0);
}
}
@@ -1658,9 +1689,9 @@ static void section_multiradix(void) {
};
for (int i = 0; i < (int)(sizeof(sign_cases)/sizeof(sign_cases[0])); i++) {
int R = sign_cases[i].r;
- double scale = (double)(1L << R);
- s32 xfp = (s32)(sign_cases[i].x * scale);
- s32 yfp = (s32)(sign_cases[i].y * scale);
+ double scale = ldexp(1.0, R);
+ s32 xfp = tofix(sign_cases[i].x, R);
+ s32 yfp = tofix(sign_cases[i].y, R);
s32 d = FR_DIV(xfp, R, yfp, R);
double rd = frd(d, R);
double ref = sign_cases[i].x / sign_cases[i].y;
@@ -1683,7 +1714,7 @@ static void section_multiradix(void) {
int exp_radixes[] = {8, 12, 16, 20};
for (int ri = 0; ri < 4; ri++) {
int R = exp_radixes[ri];
- double scale = (double)(1L << R);
+ double scale = ldexp(1.0, R);
stats_t st_exp, st_pow10;
stats_reset(&st_exp);
stats_reset(&st_pow10);
@@ -1691,23 +1722,25 @@ static void section_multiradix(void) {
/* Sweep exp(x) for x in [-4, 4] in steps of 0.05 */
for (int i = -80; i <= 80; i++) {
double x = i / 20.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_EXP(fr, R);
double rd = frd(r, R);
- double ref = exp(x);
- if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R)))
- stats_add(&st_exp, x, rd, ref);
+ double ref = exp(actual_x);
+ if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R))
+ stats_add(&st_exp, actual_x, rd, ref, 32000.0);
}
/* Sweep pow10(x) for x in [-2, 2] in steps of 0.05 */
for (int i = -40; i <= 40; i++) {
double x = i / 20.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_POW10(fr, R);
double rd = frd(r, R);
- double ref = pow(10.0, x);
- if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R)))
- stats_add(&st_pow10, x, rd, ref);
+ double ref = pow(10.0, actual_x);
+ if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R))
+ stats_add(&st_pow10, actual_x, rd, ref, 32000.0);
}
double lsb = 1.0 / scale;
@@ -1794,20 +1827,92 @@ static void section_summary(void) {
* README.md, docs/README.md, and pages/index.html.
* ============================================================ */
+/* ── Neighborhood printer ──────────────────────────────────────────
+ * Print ±K samples around a center index for any trig sweep.
+ * func_type selects the function to evaluate:
+ * 0 = fr_sin_bam 1 = fr_cos_bam 2 = fr_tan_bam
+ * 3 = fr_sin 4 = fr_cos 5 = fr_tan
+ * 6 = FR_SinI 7 = FR_CosI 8 = FR_TanI
+ * 9 = fr_sin_deg 10 = fr_cos_deg 11 = fr_tan_deg
+ */
+static void neighborhood(const char *label, int func_type,
+ int center_i, int half, int N,
+ double range_lo, double range_hi)
+{
+ printf("\n**Neighborhood: %s (center i=%d ±%d)**\n\n", label, center_i, half);
+ printf("| i | deg | input_fp | expected | got | abs_err | pct_err |\n");
+ printf("|---|---|---|---|---|---|---|\n");
+
+ for (int k = -half; k <= half; k++) {
+ int i = (center_i + k % N + N) % N;
+ double deg, angle, exp_v, got_v;
+ s32 fp;
+
+ switch (func_type) {
+ case 0: case 1: case 2: { /* BAM: 0..65535 */
+ u16 bam = (u16)i;
+ deg = bam * 360.0 / 65536.0;
+ angle = deg * M_PI / 180.0;
+ if (func_type == 0) { exp_v = q16(sin(angle)); got_v = frd(fr_sin_bam(bam), 16); }
+ else if (func_type == 1) { exp_v = q16(cos(angle)); got_v = frd(fr_cos_bam(bam), 16); }
+ else { exp_v = q16(tan_ref(angle)); got_v = frd(fr_tan_bam(bam), 16); }
+ fp = (s32)bam;
+ break;
+ }
+ case 3: case 4: case 5: { /* radian: ±2π, 131072 pts */
+ angle = range_lo + (range_hi - range_lo) * i / (double)N;
+ fp = tofix(angle, 16);
+ double actual_angle = frd(fp, 16);
+ deg = actual_angle * 180.0 / M_PI;
+ if (func_type == 3) { exp_v = q16(sin(actual_angle)); got_v = frd(fr_sin(fp, 16), 16); }
+ else if (func_type == 4) { exp_v = q16(cos(actual_angle)); got_v = frd(fr_cos(fp, 16), 16); }
+ else { exp_v = q16(tan_ref(actual_angle)); got_v = frd(fr_tan(fp, 16), 16); }
+ break;
+ }
+ case 6: case 7: case 8: { /* integer degrees */
+ int d = (int)range_lo + i;
+ deg = (double)d;
+ angle = d * M_PI / 180.0;
+ fp = (s32)d;
+ if (func_type == 6) { exp_v = q16(sin(angle)); got_v = frd(FR_SinI(d), 16); }
+ else if (func_type == 7) { exp_v = q16(cos(angle)); got_v = frd(FR_CosI(d), 16); }
+ else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_TanI((s16)d), 16); }
+ break;
+ }
+ default: { /* fixed-radix degrees: ±360, 131072 pts */
+ deg = range_lo + (range_hi - range_lo) * i / (double)N;
+ fp = tofix(deg, 16);
+ double actual_deg = frd(fp, 16);
+ angle = actual_deg * M_PI / 180.0;
+ if (func_type == 9) { exp_v = q16(sin(angle)); got_v = frd(FR_Sin(fp, 16), 16); }
+ else if (func_type == 10) { exp_v = q16(cos(angle)); got_v = frd(FR_Cos(fp, 16), 16); }
+ else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_Tan(fp, 16), 16); }
+ break;
+ }
+ }
+
+ double ae = fabs(got_v - exp_v);
+ double pe = (exp_v != 0.0) ? ae / fabs(exp_v) * 100.0 : (ae != 0.0 ? 100.0 : 0.0);
+ printf("| %d | %.6f | %d | %.6f | %.6f | %.6f | %.4f%% |\n",
+ i, deg, (int)fp, exp_v, got_v, ae, pe);
+ }
+ printf("\n");
+}
+
static void section_accuracy_table(void) {
md_h2("14. Accuracy Summary Table");
printf("\n");
if (g_showpeak) {
- printf("| Function | Max err (%%) | Avg err (%%) | Note | Peak at |\n");
+ printf("| Function | Max err (%%)*| Avg err (%%) | Note | Peak at |\n");
printf("|---|---:|---:|---|---:|\n");
} else {
- printf("| Function | Max err (%%) | Avg err (%%) | Note |\n");
+ printf("| Function | Max err (%%)*| Avg err (%%) | Note |\n");
printf("|---|---:|---:|---|\n");
}
const int R = 16;
- const double scale = (double)(1L << R);
+ const double scale = ldexp(1.0, R);
/* Persistent stats so we can print diagnostics after the table */
stats_t st_sincos, st_tan, st_asincos, st_atan2;
@@ -1823,31 +1928,33 @@ static void section_accuracy_table(void) {
for (int i = 0; i < 65536; i++) {
u16 bam = (u16)i;
double rad = bam * 2.0 * M_PI / 65536.0;
- stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)));
- stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
}
acc_row("sin/cos (BAM)", &st, "fr_sin_bam/fr_cos_bam direct; 129-entry table");
}
- /* --- sin / cos (degree wrappers: 65536-pt) --- */
+ /* --- sin / cos (degree wrappers: 65536-pt at s15.16) --- */
{
stats_t &st = st_sincos;
- const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */
- for (int i = -32768; i <= 32767; i++) {
- double deg = (double)i / (1 << radix);
- double rad = deg * M_PI / 180.0;
- stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), q16(sin(rad)));
- stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ const u16 radix = 16;
+ for (int i = 0; i < 65536; i++) {
+ double deg = -360.0 + (720.0 * i / 65536.0);
+ s32 deg_fp = tofix(deg, radix);
+ double actual_deg = frd(deg_fp, radix);
+ double rad = actual_deg * M_PI / 180.0;
+ stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, radix), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, radix), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
}
s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360,
-30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360};
for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
s16 d = specials[si];
double rad = d * M_PI / 180.0;
- stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)));
- stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
}
- acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±256° (s16 at radix 7; FR_DEG2BAM)");
+ acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM");
}
/* --- sin / cos (radian wrappers: 65536-pt) --- */
@@ -1855,9 +1962,10 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = 0; i < 65536; i++) {
double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
- s32 rad_fp = (s32)(angle * (1L << 16));
- stats_add(&st, angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(angle)));
- stats_add(&st, angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(angle)));
+ s32 rad_fp = tofix(angle, 16);
+ double actual_angle = frd(rad_fp, 16);
+ stats_add(&st, actual_angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0);
+ stats_add(&st, actual_angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0);
}
acc_row("sin/cos (rad)", &st, "fr_sin/fr_cos via fr_rad_to_bam ±2π r16");
}
@@ -1871,27 +1979,29 @@ static void section_accuracy_table(void) {
if (bam == 16384) ref = TAN_CLAMP; /* 90°: +maxint */
else if (bam == 49152) ref = -TAN_CLAMP; /* 270°: -maxint */
else ref = tan_ref(bam * 2.0 * M_PI / 65536.0);
- stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref));
+ stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref), TAN_CLAMP);
}
acc_row("tan (BAM)", &st, "fr_tan_bam 65536-pt full; ±maxint at poles");
}
- /* --- tan (degree wrappers: 65536-pt, full sweep) --- */
+ /* --- tan (degree wrappers: 65536-pt at s15.16, full sweep) --- */
{
stats_t &st = st_tan;
- const u16 radix = 7;
- for (int i = -32768; i <= 32767; i++) {
- double deg = (double)i / (1 << radix);
- double rad = deg * M_PI / 180.0;
- stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
+ const u16 radix = 16;
+ for (int i = 0; i < 65536; i++) {
+ double deg = -360.0 + (720.0 * i / 65536.0);
+ s32 deg_fp = tofix(deg, radix);
+ double actual_deg = frd(deg_fp, radix);
+ double rad = actual_deg * M_PI / 180.0;
+ stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
}
s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150};
for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) {
s16 d = specials[si];
double rad = d * M_PI / 180.0;
- stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
+ stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
}
- acc_row("tan (deg)", &st, "FR_Tan ±256° full (s16 at radix 7; FR_DEG2BAM); sat at poles");
+ acc_row("tan (deg)", &st, "FR_Tan ±360° s15.16 full; sat at poles");
}
/* --- tan (radian wrappers: 65536-pt, full sweep) --- */
@@ -1899,8 +2009,9 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = 0; i < 65536; i++) {
double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
- s32 rad_fp = (s32)(angle * (1L << 16));
- stats_add(&st, angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(angle)));
+ s32 rad_fp = tofix(angle, 16);
+ double actual_angle = frd(rad_fp, 16);
+ stats_add(&st, actual_angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP);
}
acc_row("tan (rad)", &st, "fr_tan ±2π r16 full; sat at poles");
}
@@ -1910,12 +2021,12 @@ static void section_accuracy_table(void) {
stats_t &st = st_asincos;
/* 65536-point sweep: all representable values at radix 15 over [-1, +1) */
for (int i = -32768; i <= 32767; i++) {
- double xd = (double)i / (1 << 15);
+ double xd = (double)i / 32768.0;
if (xd < -1.0 || xd > 1.0) continue;
s32 rad = FR_asin((s32)i, 15, R);
- stats_add(&st, xd, frd(rad, R), q16(asin(xd)));
+ stats_add(&st, xd, frd(rad, R), q16(asin(xd)), M_PI);
rad = FR_acos((s32)i, 15, R);
- stats_add(&st, xd, frd(rad, R), q16(acos(xd)));
+ stats_add(&st, xd, frd(rad, R), q16(acos(xd)), M_PI);
}
acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary");
}
@@ -1936,19 +2047,19 @@ static void section_accuracy_table(void) {
for (int i = -32767; i <= 32768; i++) {
double angle = i * M_PI / 32768.0;
double x = rad * cos(angle), y = rad * sin(angle);
- s32 fx = (s32)(x * scale);
- s32 fy = (s32)(y * scale);
+ s32 fx = tofix(x, R);
+ s32 fy = tofix(y, R);
if (fx == 0 && fy == 0) continue;
s32 afx = (fx < 0) ? -fx : fx;
s32 afy = (fy < 0) ? -fy : fy;
s32 minor = (afx < afy) ? afx : afy;
if (minor < 256) continue; /* input quantization, not algo */
s32 r = FR_atan2(fy, fx, R);
- double ref = atan2(y, x);
+ double ref = atan2((double)fy, (double)fx);
/* Skip near ±pi branch cut: sign depends on sub-LSB
* input quantization, not algorithm accuracy. */
if (fabs(fabs(ref) - M_PI) < 0.01) continue;
- stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref));
+ stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref), M_PI);
}
}
/* Special cases: exact quadrant/octant/30-degree angles */
@@ -1957,10 +2068,10 @@ static void section_accuracy_table(void) {
for (int si = 0; si < (int)(sizeof(specials_deg)/sizeof(specials_deg[0])); si++) {
double angle = specials_deg[si] * M_PI / 180.0;
double x = 100.0 * cos(angle), y = 100.0 * sin(angle);
- s32 fx = (s32)(x * scale), fy = (s32)(y * scale);
+ s32 fx = tofix(x, R), fy = tofix(y, R);
if (fx == 0 && fy == 0) continue;
s32 r = FR_atan2(fy, fx, R);
- stats_add(&st, specials_deg[si], frd(r, R), q16(atan2(y, x)));
+ stats_add(&st, specials_deg[si], frd(r, R), q16(atan2((double)fy, (double)fx)), M_PI);
}
acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8");
}
@@ -1970,10 +2081,11 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -10000; i <= 10000; i++) {
double x = i / 1000.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_atan(fr, (u16)R, (u16)R);
- double ref = atan(x);
- stats_add(&st, x, frd(r, R), q16(ref));
+ double ref = atan(actual_x);
+ stats_add(&st, actual_x, frd(r, R), q16(ref), M_PI / 2.0);
}
acc_row("atan", &st, "20001-pt full sweep [-10,10]; via FR_atan2");
}
@@ -1983,16 +2095,18 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
double inputs[] = {0.0001, 0.25, 0.5, 1, 2, 3, 4, 7, 9, 16, 25, 100, 1024, 10000, 32000};
for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) {
- s32 fr = (s32)(inputs[i] * scale);
+ s32 fr = tofix(inputs[i], R);
+ double actual_x = frd(fr, R);
s32 r = FR_sqrt(fr, R);
- stats_add(&st, inputs[i], frd(r, R), q16(sqrt(inputs[i])));
+ stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0));
}
/* Fine sweep */
for (int i = 1; i <= 1000; i++) {
double x = i * 10.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_sqrt(fr, R);
- stats_add(&st, x, frd(r, R), q16(sqrt(x)));
+ stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0));
}
acc_row("sqrt", &st, "Round-to-nearest");
}
@@ -2002,18 +2116,20 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
/* Integer inputs — stay within s32 range at radix 16 (max ~32767) */
for (int v = 1; v <= 32000; v += (v < 100 ? 1 : v / 10)) {
- s32 fr = (s32)((double)v * scale);
+ s32 fr = tofix((double)v, R);
if (fr <= 0) continue;
+ double actual_v = frd(fr, R);
s32 r = FR_log2(fr, (u16)R, (u16)R);
- stats_add(&st, (double)v, frd(r, R), q16(log2((double)v)));
+ stats_add(&st, actual_v, frd(r, R), q16(log2(actual_v)), log2(32000.0));
}
/* Fractional sweep 0.125 .. 1.0 */
for (int i = 1; i <= 100; i++) {
double x = 0.125 + (0.875 * i / 100.0);
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_log2(fr, (u16)R, (u16)R);
- stats_add(&st, x, frd(r, R), q16(log2(x)));
+ stats_add(&st, actual_x, frd(r, R), q16(log2(actual_x)), log2(32000.0));
}
acc_row("log2", &st, "65-entry mantissa table");
}
@@ -2023,10 +2139,11 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -800; i <= 800; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_pow2(fr, R);
- double ref = pow(2.0, x);
- stats_add(&st, x, frd(r, R), q16(ref));
+ double ref = pow(2.0, actual_x);
+ stats_add(&st, actual_x, frd(r, R), q16(ref), pow(2.0, 8.0));
}
acc_row("pow2", &st, "65-entry fraction table");
}
@@ -2036,14 +2153,15 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
double inputs[] = {0.125, 0.25, 0.5, 1, 2, M_E, 3, 4, 5, 7, 8, 10, 20, 50, 100, 1000};
for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) {
- s32 fr = (s32)(inputs[i] * scale);
+ s32 fr = tofix(inputs[i], R);
if (fr <= 0) continue;
+ double actual_x = frd(fr, R);
s32 r = FR_ln(fr, R, R);
- double ref = log(inputs[i]);
- stats_add(&st, inputs[i], frd(r, R), q16(ref));
+ double ref = log(actual_x);
+ stats_add(&st, actual_x, frd(r, R), q16(ref), log(32000.0));
r = FR_log10(fr, R, R);
- ref = log10(inputs[i]);
- stats_add(&st, inputs[i], frd(r, R), q16(ref));
+ ref = log10(actual_x);
+ stats_add(&st, actual_x, frd(r, R), q16(ref), log10(32000.0));
}
acc_row("ln, log10", &st, "Via FR_MULK28 from log2");
}
@@ -2053,11 +2171,12 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -400; i <= 400; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_EXP(fr, R);
- double ref = exp(x);
+ double ref = exp(actual_x);
if (ref > 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */
- stats_add(&st, x, frd(r, R), q16(ref));
+ stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
}
acc_row("exp", &st, "FR_MULK28 + FR_pow2");
}
@@ -2067,11 +2186,12 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -400; i <= 400; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_EXP_FAST(fr, R);
- double ref = exp(x);
+ double ref = exp(actual_x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), q16(ref));
+ stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
}
acc_row("exp_fast", &st, "Shift-only scaling");
}
@@ -2081,11 +2201,12 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -200; i <= 200; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_POW10(fr, R);
- double ref = pow(10.0, x);
+ double ref = pow(10.0, actual_x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), q16(ref));
+ stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
}
acc_row("pow10", &st, "FR_MULK28 + FR_pow2");
}
@@ -2095,11 +2216,12 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = -200; i <= 200; i++) {
double x = i / 100.0;
- s32 fr = (s32)(x * scale);
+ s32 fr = tofix(x, R);
+ double actual_x = frd(fr, R);
s32 r = FR_POW10_FAST(fr, R);
- double ref = pow(10.0, x);
+ double ref = pow(10.0, actual_x);
if (ref > 32000.0 || ref < 1e-6) continue;
- stats_add(&st, x, frd(r, R), q16(ref));
+ stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0);
}
acc_row("pow10_fast", &st, "Shift-only scaling");
}
@@ -2112,11 +2234,12 @@ static void section_accuracy_table(void) {
{1,1},{0.5,0.5},{100,100},{1000,1},{1,1000}
};
for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) {
- s32 fx = (s32)(cases[i].x * scale);
- s32 fy = (s32)(cases[i].y * scale);
+ s32 fx = tofix(cases[i].x, R);
+ s32 fy = tofix(cases[i].y, R);
+ double actual_x = frd(fx, R), actual_y = frd(fy, R);
s32 r = FR_hypot(fx, fy, R);
- double ref = hypot(cases[i].x, cases[i].y);
- stats_add(&st, ref, frd(r, R), q16(ref));
+ double ref = hypot(actual_x, actual_y);
+ stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0));
}
acc_row("hypot (exact)", &st, "64-bit intermediate");
}
@@ -2129,17 +2252,18 @@ static void section_accuracy_table(void) {
{100,100},{1000,1},{1,1000},{7,24},{20,21}
};
for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) {
- s32 fx = (s32)(cases[i].x * scale);
- s32 fy = (s32)(cases[i].y * scale);
+ s32 fx = tofix(cases[i].x, R);
+ s32 fy = tofix(cases[i].y, R);
+ double actual_x = frd(fx, R), actual_y = frd(fy, R);
s32 r = FR_hypot_fast8(fx, fy);
- double ref = hypot(cases[i].x, cases[i].y);
- if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref));
+ double ref = hypot(actual_x, actual_y);
+ if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0));
}
acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply");
}
printf("\n");
- printf("\n");
+ printf("\n*Relative error; reference clamped to 1%% of full-scale output.\n\n");
/* ── Test-only rows (not library functions — conversion & pipeline checks) ── */
md_h3("14.0.1 Conversion & pipeline accuracy (test-only)");
@@ -2151,7 +2275,7 @@ static void section_accuracy_table(void) {
stats_t &st = st_rad2bam;
for (int i = 0; i < 65536; i++) {
double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
- s32 rad_fp = (s32)(angle * scale);
+ s32 rad_fp = tofix(angle, R);
u16 got = fr_rad_to_bam(rad_fp, 16);
/* Exact BAM: wrap to u16 */
double exact_bam_d = angle * 65536.0 / (2.0 * M_PI);
@@ -2160,7 +2284,7 @@ static void section_accuracy_table(void) {
/* Feed stats as degrees so the error is interpretable */
double got_deg = got * (360.0 / 65536.0);
double exp_deg = expected * (360.0 / 65536.0);
- stats_add(&st, angle, got_deg, exp_deg);
+ stats_add(&st, angle, got_deg, exp_deg, 360.0);
}
{
char note[128];
@@ -2176,7 +2300,7 @@ static void section_accuracy_table(void) {
stats_t &st = st_deg2bam;
for (int i = 0; i < 65536; i++) {
double deg = -360.0 + (720.0 * i / 65536.0);
- s32 deg_fp = (s32)(deg * scale);
+ s32 deg_fp = tofix(deg, R);
u16 got = fr_deg_to_bam(deg_fp, 16);
/* Exact BAM: wrap to u16 */
double exact_bam_d = deg * 65536.0 / 360.0;
@@ -2184,7 +2308,7 @@ static void section_accuracy_table(void) {
u16 expected = (u16)(exact_bam_s & 0xFFFF);
double got_deg = got * (360.0 / 65536.0);
double exp_deg = expected * (360.0 / 65536.0);
- stats_add(&st, deg, got_deg, exp_deg);
+ stats_add(&st, deg, got_deg, exp_deg, 360.0);
}
{
char note[128];
@@ -2200,10 +2324,10 @@ static void section_accuracy_table(void) {
stats_t &st = st_sincos_deg_s32;
for (int deg = -360; deg <= 360; deg++) {
double rad = deg * M_PI / 180.0;
- stats_add(&st, (double)deg, frd(fr_sin_deg(deg), FR_TRIG_OUT_PREC), q16(sin(rad)));
- stats_add(&st, (double)deg, frd(fr_cos_deg(deg), FR_TRIG_OUT_PREC), q16(cos(rad)));
+ stats_add(&st, (double)deg, frd(FR_SinI(deg), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ stats_add(&st, (double)deg, frd(FR_CosI(deg), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
}
- acc_row("sin/cos (int deg)", &st, "fr_sin_deg/fr_cos_deg ±360° integer degrees");
+ acc_row("sin/cos (int deg)", &st, "FR_SinI/FR_CosI ±360° integer degrees");
}
/* --- tan via integer degrees ±360° --- */
@@ -2211,7 +2335,7 @@ static void section_accuracy_table(void) {
stats_t &st = st_tan_deg_s32;
for (int deg = -360; deg <= 360; deg++) {
double rad = deg * M_PI / 180.0;
- stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)));
+ stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
}
acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles");
}
@@ -2223,14 +2347,14 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = 0; i < 65536; i++) {
double angle = -M_PI + (2.0 * M_PI * i / 65536.0);
- s32 rad_fp = (s32)(angle * scale);
+ s32 rad_fp = tofix(angle, R);
s32 raw = FR_RAD2BAM(rad_fp);
u16 got = (u16)((raw + (1 << 15)) >> 16);
double exact_d = angle * 65536.0 / (2.0 * M_PI);
u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
double got_deg = got * (360.0 / 65536.0);
double exp_deg = expected * (360.0 / 65536.0);
- stats_add(&st, angle, got_deg, exp_deg);
+ stats_add(&st, angle, got_deg, exp_deg, 360.0);
}
acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad");
}
@@ -2240,14 +2364,14 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
const u16 radix = 7;
for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */
- double deg = (double)i / (1 << radix);
+ double deg = (double)i / 128.0;
s32 raw = FR_DEG2BAM((s32)i);
u16 got = (u16)((raw + (1 << (radix - 1))) >> radix);
double exact_d = deg * 65536.0 / 360.0;
u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF);
double got_deg = got * (360.0 / 65536.0);
double exp_deg = expected * (360.0 / 65536.0);
- stats_add(&st, deg, got_deg, exp_deg);
+ stats_add(&st, deg, got_deg, exp_deg, 360.0);
}
acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°");
}
@@ -2261,7 +2385,7 @@ static void section_accuracy_table(void) {
s32 rad_fp = FR_BAM2RAD(bam_r16);
double got_rad = frd(rad_fp, 16);
double exp_rad = (double)i * 2.0 * M_PI / 65536.0;
- stats_add(&st, (double)i, got_rad, exp_rad);
+ stats_add(&st, (double)i, got_rad, exp_rad, 2.0 * M_PI);
}
acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)");
}
@@ -2275,7 +2399,7 @@ static void section_accuracy_table(void) {
s32 deg_fp = FR_BAM2DEG(bam_r16);
double got_deg = frd(deg_fp, 16);
double exp_deg = (double)i * 360.0 / 65536.0;
- stats_add(&st, (double)i, got_deg, exp_deg);
+ stats_add(&st, (double)i, got_deg, exp_deg, 360.0);
}
acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)");
}
@@ -2285,11 +2409,11 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = 0; i < 65536; i++) {
double deg = -360.0 + (720.0 * i / 65536.0);
- s32 deg_fp = (s32)(deg * scale);
+ s32 deg_fp = tofix(deg, R);
s32 rad_fp = FR_DEG2RAD(deg_fp);
double got_rad = frd(rad_fp, 16);
double exp_rad = deg * M_PI / 180.0;
- stats_add(&st, deg, got_rad, exp_rad);
+ stats_add(&st, deg, got_rad, exp_rad, 2.0 * M_PI);
}
acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full");
}
@@ -2299,11 +2423,11 @@ static void section_accuracy_table(void) {
stats_t st; stats_reset(&st);
for (int i = 0; i < 65536; i++) {
double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0);
- s32 rad_fp = (s32)(angle * scale);
+ s32 rad_fp = tofix(angle, R);
s32 deg_fp = FR_RAD2DEG(rad_fp);
double got_deg = frd(deg_fp, 16);
double exp_deg = angle * 180.0 / M_PI;
- stats_add(&st, angle, got_deg, exp_deg);
+ stats_add(&st, angle, got_deg, exp_deg, 360.0);
}
acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full");
}
@@ -2337,6 +2461,325 @@ static void section_accuracy_table(void) {
s->max_pct_err);
}
printf("\n");
+
+ /* ── 14.3 Per-function trig sweep table ────────────────────────────
+ * One row per public entry point. Each function is swept
+ * independently over its full domain so that peak abs / pct errors
+ * are attributable to a single function, not a combined aggregate.
+ *
+ * Peak pct err is raw |err|/|expected|*100 — no clamping. Near
+ * zero crossings (sin≈0, cos≈0, asin(0)≈0) the denominator is
+ * tiny and pct blows up even when abs err is sub-LSB. The Notes
+ * column flags these rows. Use Peak abs err and Mean abs err to
+ * judge accuracy at zero crossings; use Peak pct err elsewhere.
+ */
+ md_h3("14.2 Neighborhoods (peak error ±10 samples)");
+
+ /* fr_sin radian at i=0 (-360°) — zero crossing neighborhood */
+ neighborhood("fr_sin radian @ -360 deg (i=0)", 3, 0, 10, 131072,
+ -2.0 * M_PI, 2.0 * M_PI);
+
+ md_h3("14.3 Per-function trig sweep");
+
+ printf("| Function | Input | Range start | Range end | Points | Increment | "
+ "Peak abs err | @abs_err | Peak pct err | @pct_err | Expected | Got | Mean abs err | Notes |\n");
+ printf("|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|---:|---:|---|\n");
+
+ /* Helper: print one row of the per-function table */
+ #define SWEEP_ROW(name, sig, rlo, rhi, pts, step, st, note) \
+ printf("| %s | %s | %s | %s | %d | %s | %f | %.4f | %.4f%% | %.4f | %f | %f | %f | %s |\n", \
+ name, sig, rlo, rhi, pts, step, \
+ (st).max_abs_err, (st).worst_input, (st).max_pct_err, \
+ (st).worst_pct_input, (st).worst_pct_expected, (st).worst_pct_actual, \
+ stats_mean(&(st)), note)
+
+ /* fr_sin_bam */
+ {
+ stats_t st; stats_reset(&st);
+ for (int b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double rad = bam * 2.0 * M_PI / 65536.0;
+ double deg = bam * 360.0 / 65536.0;
+ stats_add(&st, deg, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ }
+ SWEEP_ROW("fr_sin_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "");
+ }
+ /* fr_cos_bam */
+ {
+ stats_t st; stats_reset(&st);
+ for (int b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double rad = bam * 2.0 * M_PI / 65536.0;
+ double deg = bam * 360.0 / 65536.0;
+ stats_add(&st, deg, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+ }
+ SWEEP_ROW("fr_cos_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "");
+ }
+ /* fr_tan_bam */
+ {
+ stats_t st; stats_reset(&st);
+ for (int b = 0; b < 65536; b++) {
+ u16 bam = (u16)b;
+ double rad = bam * 2.0 * M_PI / 65536.0;
+ double deg = bam * 360.0 / 65536.0;
+ double ref;
+ if (bam == 16384) ref = TAN_CLAMP;
+ else if (bam == 49152) ref = -TAN_CLAMP;
+ else ref = q16(tan_ref(rad));
+ stats_add(&st, deg, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), ref, TAN_CLAMP);
+ }
+ SWEEP_ROW("fr_tan_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "pole clamped");
+ }
+ /* fr_sin (radian) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+ s32 rad_fp = tofix(angle, 16);
+ double actual_angle = frd(rad_fp, 16);
+ double deg = actual_angle * 180.0 / M_PI;
+ stats_add(&st, deg, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0);
+ }
+ SWEEP_ROW("fr_sin", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π small-angle bypass");
+ }
+ /* fr_cos (radian) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+ s32 rad_fp = tofix(angle, 16);
+ double actual_angle = frd(rad_fp, 16);
+ double deg = actual_angle * 180.0 / M_PI;
+ stats_add(&st, deg, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0);
+ }
+ SWEEP_ROW("fr_cos", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "");
+ }
+ /* fr_tan (radian) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2);
+ s32 rad_fp = tofix(angle, 16);
+ double actual_angle = frd(rad_fp, 16);
+ double deg = actual_angle * 180.0 / M_PI;
+ stats_add(&st, deg, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP);
+ }
+ SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π bypass; s64 lerp near poles");
+ }
+ /* FR_SinI */
+ {
+ stats_t st; stats_reset(&st);
+ for (int d = -360; d <= 360; d++) {
+ double rad = d * M_PI / 180.0;
+ stats_add(&st, (double)d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ }
+ SWEEP_ROW("FR_SinI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "");
+ }
+ /* FR_CosI */
+ {
+ stats_t st; stats_reset(&st);
+ for (int d = -360; d <= 360; d++) {
+ double rad = d * M_PI / 180.0;
+ stats_add(&st, (double)d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+ }
+ SWEEP_ROW("FR_CosI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "");
+ }
+ /* FR_TanI */
+ {
+ stats_t st; stats_reset(&st);
+ for (int d = -360; d <= 360; d++) {
+ double rad = d * M_PI / 180.0;
+ double ref;
+ if (d % 180 == 90 || d % 180 == -90)
+ ref = (d > 0) ? TAN_CLAMP : -TAN_CLAMP;
+ else
+ ref = q16(tan_ref(rad));
+ stats_add(&st, (double)d, frd(FR_TanI((s16)d), FR_TRIG_OUT_PREC), ref, TAN_CLAMP);
+ }
+ SWEEP_ROW("FR_TanI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "pole clamped");
+ }
+ /* fr_sin_deg (fixed-radix degrees, radix 16) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double deg = -360.0 + 720.0 * i / (double)N2;
+ s32 deg_fp = tofix(deg, 16);
+ double actual_deg = frd(deg_fp, 16);
+ double rad = actual_deg * M_PI / 180.0;
+ stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, 16), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0);
+ }
+ SWEEP_ROW("fr_sin_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak at sin=0 crossing");
+ }
+ /* fr_cos_deg (fixed-radix degrees, radix 16) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double deg = -360.0 + 720.0 * i / (double)N2;
+ s32 deg_fp = tofix(deg, 16);
+ double actual_deg = frd(deg_fp, 16);
+ double rad = actual_deg * M_PI / 180.0;
+ stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, 16), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0);
+ }
+ SWEEP_ROW("fr_cos_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-90/270 small-angle bypass");
+ }
+ /* fr_tan_deg (fixed-radix degrees, radix 16) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N2 = 131072;
+ for (int i = 0; i < N2; i++) {
+ double deg = -360.0 + 720.0 * i / (double)N2;
+ s32 deg_fp = tofix(deg, 16);
+ double actual_deg = frd(deg_fp, 16);
+ double rad = actual_deg * M_PI / 180.0;
+ stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP);
+ }
+ SWEEP_ROW("fr_tan_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak near tan pole");
+ }
+
+ /* --- Inverse Trig --- */
+
+ /* FR_acos */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65537;
+ for (int i = 0; i < N; i++) {
+ double xd = -1.0 + 2.0 * i / (double)(N - 1);
+ s32 fr = tofix(xd, 15);
+ double actual_xd = frd(fr, 15);
+ s32 rad = FR_acos(fr, 15, 16);
+ stats_add(&st, actual_xd, frd(rad, 16), q16(acos(actual_xd)), M_PI);
+ }
+ SWEEP_ROW("FR_acos", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out");
+ }
+ /* FR_asin */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65537;
+ for (int i = 0; i < N; i++) {
+ double xd = -1.0 + 2.0 * i / (double)(N - 1);
+ s32 fr = tofix(xd, 15);
+ double actual_xd = frd(fr, 15);
+ s32 rad = FR_asin(fr, 15, 16);
+ stats_add(&st, actual_xd, frd(rad, 16), q16(asin(actual_xd)), M_PI);
+ }
+ SWEEP_ROW("FR_asin", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out; pct peak at asin(0)=0");
+ }
+ /* FR_atan */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 131072;
+ for (int i = 0; i < N; i++) {
+ double xd = -10.0 + 20.0 * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 rad = FR_atan(fr, 16, 16);
+ stats_add(&st, actual_xd, frd(rad, 16), q16(atan(actual_xd)), M_PI / 2.0);
+ }
+ SWEEP_ROW("FR_atan", "(s32,u16 16,u16 16)", "-10.0", "+10.0", N, "1.53e-4", st, "r16 in/out");
+ }
+ /* FR_atan2 — unit circle sweep */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 0; i < N; i++) {
+ double angle = -M_PI + 2.0 * M_PI * i / (double)N;
+ double deg = angle * 180.0 / M_PI;
+ s32 x = tofix(cos(angle), 15);
+ s32 y = tofix(sin(angle), 15);
+ s32 rad = FR_atan2(y, x, 16);
+ stats_add(&st, deg, frd(rad, 16), q16(atan2((double)y, (double)x)), M_PI);
+ }
+ SWEEP_ROW("FR_atan2", "(s32 y,s32 x,u16 16)", "-180", "+180", N, "0.0055 deg", st, "unit circle r15");
+ }
+
+ /* --- Log / Exp --- */
+
+ /* FR_log2 */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 1; i <= N; i++) {
+ double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_log2(fr, 16, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(log2(actual_xd)), log2(32000.0));
+ }
+ SWEEP_ROW("FR_log2", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+ }
+ /* FR_ln */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 1; i <= N; i++) {
+ double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_ln(fr, 16, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(log(actual_xd)), log(32000.0));
+ }
+ SWEEP_ROW("FR_ln", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+ }
+ /* FR_log10 */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 1; i <= N; i++) {
+ double xd = 0.01 + (256.0 - 0.01) * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_log10(fr, 16, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(log10(actual_xd)), log10(32000.0));
+ }
+ SWEEP_ROW("FR_log10", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out");
+ }
+ /* FR_pow2 */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 0; i < N; i++) {
+ double xd = -8.0 + 16.0 * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_pow2(fr, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(pow(2.0, actual_xd)), pow(2.0, 8.0));
+ }
+ SWEEP_ROW("FR_pow2", "(s32,u16 16)", "-8.0", "+8.0", N, "2.44e-4", st, "r16 in/out");
+ }
+ /* FR_EXP (macro wrapping FR_pow2) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 0; i < N; i++) {
+ double xd = -5.0 + 15.0 * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_EXP(fr, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(exp(actual_xd)), 32000.0);
+ }
+ SWEEP_ROW("FR_EXP", "(s32,u16 16)", "-5.0", "+10.0", N, "2.29e-4", st, "macro, wraps FR_pow2");
+ }
+ /* FR_POW10 (macro wrapping FR_pow2) */
+ {
+ stats_t st; stats_reset(&st);
+ const int N = 65536;
+ for (int i = 0; i < N; i++) {
+ double xd = -2.0 + 6.0 * i / (double)N;
+ s32 fr = tofix(xd, 16);
+ double actual_xd = frd(fr, 16);
+ s32 r = FR_POW10(fr, 16);
+ stats_add(&st, actual_xd, frd(r, 16), q16(pow(10.0, actual_xd)), 32000.0);
+ }
+ SWEEP_ROW("FR_POW10", "(s32,u16 16)", "-2.0", "+4.0", N, "9.15e-5", st, "macro, wraps FR_pow2");
+ }
+
+ #undef SWEEP_ROW
+ printf("\n");
}
int main(void) {
@@ -2362,4 +2805,4 @@ int main(void) {
section_accuracy_table();
return 0;
-}
+}
\ No newline at end of file
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..29d8ac7
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,131 @@
+# FR_Math Tools
+
+Diagnostic and code-generation utilities for the FR_Math library.
+
+## trig_neighborhood
+
+Sweep any math function over a range and print a neighborhood table showing
+raw output, expected reference, absolute error, and percent error.
+
+**Build:** `make tools`
+
+**Usage:**
+```
+trig_neighborhood
[options]
+```
+
+### Supported functions (25)
+
+| Category | Functions |
+|---|---|
+| Trig (degrees) | `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin`, `fr_cos`, `fr_tan`, `FR_SinI`, `FR_CosI`, `FR_TanI`, `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` |
+| Inverse trig | `FR_acos`, `FR_asin`, `FR_atan`, `FR_atan2` |
+| Logarithmic | `FR_log2`, `FR_ln`, `FR_log10` |
+| Exponential | `FR_pow2`, `FR_EXP`, `FR_POW10` |
+| Other | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
+
+### Options
+
+| Option | Description | Default |
+|---|---|---|
+| `--inc ` | Increment per sample | function-dependent |
+| `--fmt md\|csv\|ascii` | Output format | `md` |
+| `--radix ` | Input radix for fixed-point | 16 |
+| `--out_radix ` | Output radix (inv trig, log) | 16 |
+| `--y ` | Fixed y for hypot functions | 0.0 |
+
+### Default increments
+
+- Trig + FR_atan2: `360/65536` (~0.0055 degrees)
+- FR_acos, FR_asin: `1/32768` (~3.05e-5)
+- All others: `1/65536` (~1.53e-5)
+
+### Examples
+
+```bash
+# Cosine near -90 degrees
+build/trig_neighborhood fr_cos -90 15
+
+# Sine sweep in CSV format
+build/trig_neighborhood fr_sin -360 10 --fmt csv
+
+# Tangent near pole
+build/trig_neighborhood fr_tan 89.5 20 --inc 0.01
+
+# Arcsine near zero
+build/trig_neighborhood FR_asin 0.0001 15 --inc 3.05e-5 --radix 15
+
+# Log2 near 1.0
+build/trig_neighborhood FR_log2 1.0 15 --inc 0.01
+
+# Atan2 near 90 degrees
+build/trig_neighborhood FR_atan2 90 15
+
+# Hypot with y=50
+build/trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8
+```
+
+---
+
+## coef-gen.py
+
+Python script for generating power-of-two coefficient approximations. Given a
+target floating-point value, searches for combinations of `+/- 2^(-k)` terms
+that best approximate the value using only bit-shifts and adds.
+
+**Usage:** `python3 tools/coef-gen.py`
+
+---
+
+## fr_coef-gen.cpp
+
+C++ coefficient generator for 32-bit host. Similar purpose to `coef-gen.py`
+but runs natively and can be used for brute-force search over larger term
+counts.
+
+**Build:** `g++ -O2 tools/fr_coef-gen.cpp -o build/fr_coef-gen`
+
+---
+
+## gen_pow2_table.py
+
+Generates the `gFR_POW2_FRAC_TAB[65]` lookup table used by `FR_pow2()`.
+Output is a C array suitable for inclusion in FR_math.c.
+
+**Usage:** `python3 tools/gen_pow2_table.py`
+
+---
+
+## gen_radix28_constants.py
+
+Generates radix-28 constants used by FR_EXP, FR_ln, FR_log10 for base
+conversion (e.g., `FR_kLOG2E_28`, `FR_kLOG2_10_28`).
+
+**Usage:** `python3 tools/gen_radix28_constants.py`
+
+---
+
+## check_published_versions.sh
+
+Verifies that published version tags match the version defined in
+`FR_math.h` (`FR_MATH_VERSION_HEX`). Used in CI/release workflows.
+
+**Usage:** `bash tools/check_published_versions.sh`
+
+---
+
+## make_release.sh
+
+Release automation script. Bumps version, tags, and prepares release
+artifacts.
+
+**Usage:** `bash tools/make_release.sh`
+
+---
+
+## interp_analysis.html
+
+Interactive HTML/JS visualization for interpolation analysis. Open in a
+browser to explore interpolation error characteristics.
+
+**Usage:** Open `tools/interp_analysis.html` in a web browser.
diff --git a/tools/trig_neighborhood.cpp b/tools/trig_neighborhood.cpp
new file mode 100644
index 0000000..3266515
--- /dev/null
+++ b/tools/trig_neighborhood.cpp
@@ -0,0 +1,519 @@
+/*
+ * trig_neighborhood.cpp — sweep any math function over a range, print neighborhood table
+ *
+ * Usage:
+ * trig_neighborhood
[--inc ] [--fmt md|csv|ascii]
+ * [--radix ] [--out_radix ] [--y ]
+ *
+ * Trig functions:
+ * fr_sin_bam, fr_cos_bam, fr_tan_bam,
+ * fr_sin, fr_cos, fr_tan,
+ * FR_SinI, FR_CosI, FR_TanI,
+ * fr_sin_deg, fr_cos_deg, fr_tan_deg
+ *
+ * Inverse trig:
+ * FR_acos, FR_asin, FR_atan, FR_atan2
+ *
+ * Logarithmic:
+ * FR_log2, FR_ln, FR_log10
+ *
+ * Exponential:
+ * FR_pow2, FR_EXP, FR_POW10
+ *
+ * Other:
+ * FR_sqrt, FR_hypot, FR_hypot_fast8
+ *
+ * center: center value (degrees for trig/atan2, input value for others)
+ * half: number of samples on each side of center
+ * --inc: increment (default depends on function type)
+ * --fmt: output format: md (default), csv, ascii
+ * --radix: input radix for fixed-point functions (default: 16)
+ * --out_radix: output radix for inverse trig and log (default: 16)
+ * --y: fixed y value for FR_hypot / FR_hypot_fast8 (default: 0.0)
+ *
+ * Examples:
+ * trig_neighborhood fr_cos -90 15
+ * trig_neighborhood fr_sin -360 10 --fmt csv
+ * trig_neighborhood fr_tan 89.5 20 --inc 0.01
+ * trig_neighborhood fr_sin_deg 45 10 --radix 8
+ * trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16
+ * trig_neighborhood FR_log2 1.0 15 --inc 0.01
+ * trig_neighborhood FR_atan2 90 15
+ * trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8
+ *
+ * Build:
+ * make tools
+ */
+#include
+#include
+#include
+#include
+#include "FR_math.h"
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846
+#endif
+
+static double frd(s32 v, int p) { return (double)v / ldexp(1.0, p); }
+static double qN(double v, int p) { double s = ldexp(1.0, p); return floor(v * s + 0.5) / s; }
+/* Round-to-nearest float→fixed conversion (not truncation) */
+static s32 tofix(double v, int p) { return (s32)floor(ldexp(v, p) + 0.5); }
+static const double TAN_CLAMP = (double)0x7fffffff / 65536.0;
+static double tan_ref(double rad) {
+ double t = tan(rad);
+ if (t > TAN_CLAMP) return TAN_CLAMP;
+ if (t < -TAN_CLAMP) return -TAN_CLAMP;
+ return t;
+}
+
+enum Func {
+ F_SIN_BAM, F_COS_BAM, F_TAN_BAM,
+ F_SIN, F_COS, F_TAN,
+ F_SINI, F_COSI, F_TANI,
+ F_SIN_DEG, F_COS_DEG, F_TAN_DEG,
+ F_ACOS, F_ASIN, F_ATAN, F_ATAN2,
+ F_LOG2, F_LN, F_LOG10,
+ F_POW2, F_EXP, F_POW10,
+ F_SQRT, F_HYPOT, F_HYPOT_FAST8,
+ F_UNKNOWN
+};
+
+enum Fmt { FMT_MD, FMT_CSV, FMT_ASCII };
+
+static Func parse_func(const char *s) {
+ if (!strcmp(s, "fr_sin_bam")) return F_SIN_BAM;
+ if (!strcmp(s, "fr_cos_bam")) return F_COS_BAM;
+ if (!strcmp(s, "fr_tan_bam")) return F_TAN_BAM;
+ if (!strcmp(s, "fr_sin")) return F_SIN;
+ if (!strcmp(s, "fr_cos")) return F_COS;
+ if (!strcmp(s, "fr_tan")) return F_TAN;
+ if (!strcmp(s, "FR_SinI")) return F_SINI;
+ if (!strcmp(s, "FR_CosI")) return F_COSI;
+ if (!strcmp(s, "FR_TanI")) return F_TANI;
+ if (!strcmp(s, "fr_sin_deg")) return F_SIN_DEG;
+ if (!strcmp(s, "fr_cos_deg")) return F_COS_DEG;
+ if (!strcmp(s, "fr_tan_deg")) return F_TAN_DEG;
+ if (!strcmp(s, "FR_acos")) return F_ACOS;
+ if (!strcmp(s, "FR_asin")) return F_ASIN;
+ if (!strcmp(s, "FR_atan")) return F_ATAN;
+ if (!strcmp(s, "FR_atan2")) return F_ATAN2;
+ if (!strcmp(s, "FR_log2")) return F_LOG2;
+ if (!strcmp(s, "FR_ln")) return F_LN;
+ if (!strcmp(s, "FR_log10")) return F_LOG10;
+ if (!strcmp(s, "FR_pow2")) return F_POW2;
+ if (!strcmp(s, "FR_EXP")) return F_EXP;
+ if (!strcmp(s, "FR_POW10")) return F_POW10;
+ if (!strcmp(s, "FR_sqrt")) return F_SQRT;
+ if (!strcmp(s, "FR_hypot")) return F_HYPOT;
+ if (!strcmp(s, "FR_hypot_fast8")) return F_HYPOT_FAST8;
+ return F_UNKNOWN;
+}
+
+static const char *func_name(Func f) {
+ switch (f) {
+ case F_SIN_BAM: return "fr_sin_bam";
+ case F_COS_BAM: return "fr_cos_bam";
+ case F_TAN_BAM: return "fr_tan_bam";
+ case F_SIN: return "fr_sin";
+ case F_COS: return "fr_cos";
+ case F_TAN: return "fr_tan";
+ case F_SINI: return "FR_SinI";
+ case F_COSI: return "FR_CosI";
+ case F_TANI: return "FR_TanI";
+ case F_SIN_DEG: return "fr_sin_deg";
+ case F_COS_DEG: return "fr_cos_deg";
+ case F_TAN_DEG: return "fr_tan_deg";
+ case F_ACOS: return "FR_acos";
+ case F_ASIN: return "FR_asin";
+ case F_ATAN: return "FR_atan";
+ case F_ATAN2: return "FR_atan2";
+ case F_LOG2: return "FR_log2";
+ case F_LN: return "FR_ln";
+ case F_LOG10: return "FR_log10";
+ case F_POW2: return "FR_pow2";
+ case F_EXP: return "FR_EXP";
+ case F_POW10: return "FR_POW10";
+ case F_SQRT: return "FR_sqrt";
+ case F_HYPOT: return "FR_hypot";
+ case F_HYPOT_FAST8: return "FR_hypot_fast8";
+ default: return "?";
+ }
+}
+
+static int is_sin(Func f) { return f == F_SIN_BAM || f == F_SIN || f == F_SINI || f == F_SIN_DEG; }
+static int is_cos(Func f) { return f == F_COS_BAM || f == F_COS || f == F_COSI || f == F_COS_DEG; }
+static int is_trig(Func f) { return f <= F_TAN_DEG; }
+
+/* Evaluate function. Returns raw s32 result and sets input_fp, expected, out_prec. */
+static s32 eval(Func f, double val, int radix, int out_radix,
+ double y_val, s32 *input_fp, double *expected, int *out_prec)
+{
+ s32 raw = 0;
+
+ /* --- Trig functions (val = degrees) --- */
+ if (is_trig(f)) {
+ double rad = val * M_PI / 180.0;
+ *out_prec = 16;
+
+ if (is_sin(f)) *expected = qN(sin(rad), 16);
+ else if (is_cos(f)) *expected = qN(cos(rad), 16);
+ else *expected = qN(tan_ref(rad), 16);
+
+ switch (f) {
+ case F_SIN_BAM: {
+ u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+ *input_fp = (s32)bam;
+ raw = fr_sin_bam(bam);
+ break;
+ }
+ case F_COS_BAM: {
+ u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+ *input_fp = (s32)bam;
+ raw = fr_cos_bam(bam);
+ break;
+ }
+ case F_TAN_BAM: {
+ u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF);
+ *input_fp = (s32)bam;
+ raw = fr_tan_bam(bam);
+ break;
+ }
+ case F_SIN: {
+ s32 rad_fp = tofix(rad, radix);
+ *input_fp = rad_fp;
+ raw = fr_sin(rad_fp, (u16)radix);
+ break;
+ }
+ case F_COS: {
+ s32 rad_fp = tofix(rad, radix);
+ *input_fp = rad_fp;
+ raw = fr_cos(rad_fp, (u16)radix);
+ break;
+ }
+ case F_TAN: {
+ s32 rad_fp = tofix(rad, radix);
+ *input_fp = rad_fp;
+ raw = fr_tan(rad_fp, (u16)radix);
+ break;
+ }
+ case F_SINI:
+ *input_fp = (s32)(int)val;
+ raw = FR_SinI((int)val);
+ break;
+ case F_COSI:
+ *input_fp = (s32)(int)val;
+ raw = FR_CosI((int)val);
+ break;
+ case F_TANI:
+ *input_fp = (s32)(int)val;
+ raw = FR_TanI((s16)(int)val);
+ break;
+ case F_SIN_DEG: {
+ s32 deg_fp = tofix(val, radix);
+ *input_fp = deg_fp;
+ raw = fr_sin_deg(deg_fp, (u16)radix);
+ break;
+ }
+ case F_COS_DEG: {
+ s32 deg_fp = tofix(val, radix);
+ *input_fp = deg_fp;
+ raw = fr_cos_deg(deg_fp, (u16)radix);
+ break;
+ }
+ case F_TAN_DEG: {
+ s32 deg_fp = tofix(val, radix);
+ *input_fp = deg_fp;
+ raw = fr_tan_deg(deg_fp, (u16)radix);
+ break;
+ }
+ default:
+ break;
+ }
+ return raw;
+ }
+
+ /* --- Inverse trig (val = input value, not degrees) --- */
+ if (f == F_ACOS || f == F_ASIN || f == F_ATAN) {
+ *out_prec = out_radix;
+ s32 inp = tofix(val, radix);
+ *input_fp = inp;
+
+ switch (f) {
+ case F_ACOS:
+ raw = FR_acos(inp, (u16)radix, (u16)out_radix);
+ *expected = qN(acos(val), out_radix);
+ break;
+ case F_ASIN:
+ raw = FR_asin(inp, (u16)radix, (u16)out_radix);
+ *expected = qN(asin(val), out_radix);
+ break;
+ case F_ATAN:
+ raw = FR_atan(inp, (u16)radix, (u16)out_radix);
+ *expected = qN(atan(val), out_radix);
+ break;
+ default:
+ break;
+ }
+ return raw;
+ }
+
+ /* --- FR_atan2 (val = degrees on unit circle) --- */
+ if (f == F_ATAN2) {
+ *out_prec = out_radix;
+ double rad = val * M_PI / 180.0;
+ s32 x = tofix(cos(rad), 15);
+ s32 y = tofix(sin(rad), 15);
+ *input_fp = tofix(val, radix);
+ raw = FR_atan2(y, x, (u16)out_radix);
+ double ref = atan2((double)y, (double)x);
+ *expected = qN(ref, out_radix);
+ return raw;
+ }
+
+ /* --- Log functions (val = input value) --- */
+ if (f == F_LOG2 || f == F_LN || f == F_LOG10) {
+ *out_prec = out_radix;
+ s32 inp = tofix(val, radix);
+ *input_fp = inp;
+
+ switch (f) {
+ case F_LOG2:
+ raw = FR_log2(inp, (u16)radix, (u16)out_radix);
+ *expected = (val > 0.0) ? qN(log2(val), out_radix) : 0.0;
+ break;
+ case F_LN:
+ raw = FR_ln(inp, (u16)radix, (u16)out_radix);
+ *expected = (val > 0.0) ? qN(log(val), out_radix) : 0.0;
+ break;
+ case F_LOG10:
+ raw = FR_log10(inp, (u16)radix, (u16)out_radix);
+ *expected = (val > 0.0) ? qN(log10(val), out_radix) : 0.0;
+ break;
+ default:
+ break;
+ }
+ return raw;
+ }
+
+ /* --- Power/exp functions (val = exponent) --- */
+ if (f == F_POW2 || f == F_EXP || f == F_POW10) {
+ *out_prec = radix;
+ s32 inp = tofix(val, radix);
+ *input_fp = inp;
+
+ switch (f) {
+ case F_POW2:
+ raw = FR_pow2(inp, (u16)radix);
+ *expected = qN(pow(2.0, val), radix);
+ break;
+ case F_EXP:
+ raw = FR_EXP(inp, (u16)radix);
+ *expected = qN(exp(val), radix);
+ break;
+ case F_POW10:
+ raw = FR_POW10(inp, (u16)radix);
+ *expected = qN(pow(10.0, val), radix);
+ break;
+ default:
+ break;
+ }
+ return raw;
+ }
+
+ /* --- FR_sqrt (val = input value) --- */
+ if (f == F_SQRT) {
+ *out_prec = radix;
+ s32 inp = tofix(val, radix);
+ *input_fp = inp;
+ raw = FR_sqrt(inp, (u16)radix);
+ *expected = (val >= 0.0) ? qN(sqrt(val), radix) : 0.0;
+ return raw;
+ }
+
+ /* --- FR_hypot / FR_hypot_fast8 (val = x, y_val = y) --- */
+ if (f == F_HYPOT || f == F_HYPOT_FAST8) {
+ *out_prec = radix;
+ s32 x_fp = tofix(val, radix);
+ s32 y_fp = tofix(y_val, radix);
+ *input_fp = x_fp;
+
+ if (f == F_HYPOT)
+ raw = FR_hypot(x_fp, y_fp, (u16)radix);
+ else
+ raw = FR_hypot_fast8(x_fp, y_fp);
+
+ *expected = qN(hypot(val, y_val), radix);
+ return raw;
+ }
+
+ /* fallback */
+ *input_fp = 0;
+ *expected = 0.0;
+ *out_prec = 16;
+ return 0;
+}
+
+/* Smart default increment based on function type */
+static double default_inc(Func f) {
+ if (is_trig(f) || f == F_ATAN2)
+ return 360.0 / 65536.0; /* ~0.0055 degrees */
+ if (f == F_ACOS || f == F_ASIN)
+ return 1.0 / 32768.0; /* ~3.05e-5, matches r15 LSB */
+ return 1.0 / 65536.0; /* ~1.53e-5, matches r16 LSB */
+}
+
+static void usage(void) {
+ fprintf(stderr,
+ "Usage: trig_neighborhood
[options]\n"
+ "\n"
+ "Supported functions:\n"
+ "\n"
+ " Trig (input: degrees):\n"
+ " fr_sin_bam, fr_cos_bam, fr_tan_bam\n"
+ " fr_sin, fr_cos, fr_tan\n"
+ " FR_SinI, FR_CosI, FR_TanI\n"
+ " fr_sin_deg, fr_cos_deg, fr_tan_deg\n"
+ "\n"
+ " Inverse trig (input: value):\n"
+ " FR_acos, FR_asin, FR_atan\n"
+ "\n"
+ " Inverse trig (input: degrees on unit circle):\n"
+ " FR_atan2\n"
+ "\n"
+ " Logarithmic (input: value):\n"
+ " FR_log2, FR_ln, FR_log10\n"
+ "\n"
+ " Exponential (input: exponent):\n"
+ " FR_pow2, FR_EXP, FR_POW10\n"
+ "\n"
+ " Other:\n"
+ " FR_sqrt (input: value)\n"
+ " FR_hypot, FR_hypot_fast8 (input: x, --y for y)\n"
+ "\n"
+ " center: center of sweep (degrees for trig/atan2, value otherwise)\n"
+ " half: number of samples each side of center\n"
+ "\n"
+ "Options:\n"
+ " --inc increment (default depends on function)\n"
+ " --fmt md|csv|ascii output format (default: md)\n"
+ " --radix input radix for fixed-point (default: 16)\n"
+ " --out_radix output radix for inv trig/log (default: 16)\n"
+ " --y fixed y value for hypot functions (default: 0.0)\n"
+ "\n"
+ "Examples:\n"
+ " trig_neighborhood fr_cos -90 15\n"
+ " trig_neighborhood fr_sin -360 10 --fmt csv\n"
+ " trig_neighborhood fr_tan 89.5 20 --inc 0.01\n"
+ " trig_neighborhood fr_sin_deg 45 10 --radix 8\n"
+ " trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16\n"
+ " trig_neighborhood FR_log2 1.0 15 --inc 0.01\n"
+ " trig_neighborhood FR_atan2 90 15\n"
+ " trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8\n"
+ );
+}
+
+int main(int argc, char **argv) {
+ if (argc < 4) { usage(); return 1; }
+
+ Func func = parse_func(argv[1]);
+ if (func == F_UNKNOWN) {
+ fprintf(stderr, "Unknown function: %s\n", argv[1]);
+ usage();
+ return 1;
+ }
+
+ double center = atof(argv[2]);
+ int half = atoi(argv[3]);
+ double inc = -1.0; /* sentinel: use default */
+ Fmt fmt = FMT_MD;
+ int radix = 16;
+ int out_radix = 16;
+ double y_val = 0.0;
+
+ for (int i = 4; i < argc; i++) {
+ if (!strcmp(argv[i], "--inc") && i + 1 < argc)
+ inc = atof(argv[++i]);
+ else if (!strcmp(argv[i], "--fmt") && i + 1 < argc) {
+ i++;
+ if (!strcmp(argv[i], "csv")) fmt = FMT_CSV;
+ else if (!strcmp(argv[i], "ascii")) fmt = FMT_ASCII;
+ else fmt = FMT_MD;
+ }
+ else if (!strcmp(argv[i], "--radix") && i + 1 < argc)
+ radix = atoi(argv[++i]);
+ else if (!strcmp(argv[i], "--out_radix") && i + 1 < argc)
+ out_radix = atoi(argv[++i]);
+ else if (!strcmp(argv[i], "--y") && i + 1 < argc)
+ y_val = atof(argv[++i]);
+ }
+
+ if (inc < 0.0) inc = default_inc(func);
+
+ const char *cols[] = {"sample", "val", "input_fp", "radix", "raw_got", "raw_exp", "expected", "got", "abs_err", "pct_err"};
+ int ncols = 10;
+
+ switch (fmt) {
+ case FMT_CSV:
+ for (int c = 0; c < ncols; c++)
+ printf("%s%s", cols[c], c < ncols - 1 ? "," : "\n");
+ break;
+ case FMT_MD:
+ printf("**%s** center=%.6f, +/-%d samples, inc=%.6g, radix=%d",
+ func_name(func), center, half, inc, radix);
+ if (out_radix != radix)
+ printf(", out_radix=%d", out_radix);
+ if (func == F_HYPOT || func == F_HYPOT_FAST8)
+ printf(", y=%.6f", y_val);
+ printf("\n\n");
+ printf("|");
+ for (int c = 0; c < ncols; c++) printf(" %s |", cols[c]);
+ printf("\n|");
+ for (int c = 0; c < ncols; c++) printf("---|");
+ printf("\n");
+ break;
+ case FMT_ASCII:
+ printf("# %s center=%.6f +/-%d inc=%.6g radix=%d",
+ func_name(func), center, half, inc, radix);
+ if (out_radix != radix)
+ printf(" out_radix=%d", out_radix);
+ if (func == F_HYPOT || func == F_HYPOT_FAST8)
+ printf(" y=%.6f", y_val);
+ printf("\n");
+ printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n",
+ cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], cols[6], cols[7], cols[8], cols[9]);
+ printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n",
+ "--------", "------------", "------------", "------",
+ "----------", "----------",
+ "------------", "------------", "------------", "------------");
+ break;
+ }
+
+ for (int k = -half; k <= half; k++) {
+ double val = center + k * inc;
+ s32 input_fp;
+ double expected;
+ int out_prec;
+ s32 raw = eval(func, val, radix, out_radix, y_val, &input_fp, &expected, &out_prec);
+ s32 raw_exp = (s32)floor(ldexp(expected, out_prec) + 0.5);
+ double got = frd(raw, out_prec);
+ double ae = fabs(got - expected);
+ double pe = (expected != 0.0) ? ae / fabs(expected) * 100.0 : (ae != 0.0 ? 100.0 : 0.0);
+
+ switch (fmt) {
+ case FMT_CSV:
+ printf("%d,%.6g,%d,%d,%d,%d,%.6f,%.6f,%.6f,%.4f%%\n",
+ k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+ break;
+ case FMT_MD:
+ printf("| %d | %.6g | %d | %d | %d | %d | %.6f | %.6f | %.6f | %.4f%% |\n",
+ k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+ break;
+ case FMT_ASCII:
+ printf("%8d %12.6g %12d %6d %10d %10d %12.6f %12.6f %12.6f %11.4f%%\n",
+ k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe);
+ break;
+ }
+ }
+
+ return 0;
+}
From 44061aafdf5e75cb9b57961e354a59bc516ba266 Mon Sep 17 00:00:00 2001
From: deftio
Date: Mon, 4 May 2026 09:16:52 -0700
Subject: [PATCH 4/7] v2.0.7 docs and prec updates
---
.github/workflows/release.yml | 24 +
README.md | 160 +--
VERSION | 2 +-
agents.md | 3 +-
compare_lfm/comparison_results.json | 92 +-
compare_lfm/comparison_summary.md | 32 +-
dev/misc/FR_math.c.checkpoint3 | 1705 +++++++++++++++++++++++++++
docs/README.md | 54 +-
docs/building.md | 44 +-
docs/examples.md | 2 +-
docs/getting-started.md | 4 +-
idf_component.yml | 2 +-
library.json | 2 +-
library.properties | 2 +-
llms.txt | 5 +-
makefile | 43 +-
pages/assets/site.js | 2 +-
pages/guide/building.html | 42 +-
pages/guide/examples.html | 2 +-
pages/guide/getting-started.html | 4 +-
pages/index.html | 67 +-
scripts/crossbuild-docker.sh | 123 --
scripts/crossbuild_sizes.sh | 290 +++++
scripts/size_report.sh | 142 ---
scripts/update_sizes.sh | 158 ---
src/FR_math.c | 394 +++++--
src/FR_math.h | 32 +-
src/FR_math_2D.cpp | 2 +-
src/FR_math_2D.h | 2 +-
src/FR_tan32.c | 282 -----
src/FR_tan_table.h | 115 --
src/FR_trig_table.h | 106 --
tests/test_full_sweep.c | 346 ------
tests/test_pole_table.c | 92 --
tests/test_sweep_csv.c | 149 ---
tests/test_tan32.c | 424 -------
tests/test_tan32_peaks.c | 198 ----
tests/test_tan32_sweep.c | 318 -----
tests/test_tdd.cpp | 2 +-
tools/trig_neighborhood.cpp | 17 +
40 files changed, 2633 insertions(+), 2852 deletions(-)
create mode 100644 dev/misc/FR_math.c.checkpoint3
delete mode 100755 scripts/crossbuild-docker.sh
create mode 100755 scripts/crossbuild_sizes.sh
delete mode 100755 scripts/size_report.sh
delete mode 100755 scripts/update_sizes.sh
delete mode 100644 src/FR_tan32.c
delete mode 100644 src/FR_tan_table.h
delete mode 100644 src/FR_trig_table.h
delete mode 100644 tests/test_full_sweep.c
delete mode 100644 tests/test_pole_table.c
delete mode 100644 tests/test_sweep_csv.c
delete mode 100644 tests/test_tan32.c
delete mode 100644 tests/test_tan32_peaks.c
delete mode 100644 tests/test_tan32_sweep.c
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index c5765cd..bea9935 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -99,3 +99,27 @@ jobs:
body_path: /tmp/release_notes.md
draft: false
prerelease: false
+
+ publish-pio:
+ needs: release
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install PlatformIO
+ run: pip install platformio
+ - name: Publish to PlatformIO Registry
+ env:
+ PLATFORMIO_AUTH_TOKEN: ${{ secrets.PLATFORMIO_AUTH_TOKEN }}
+ run: pio pkg publish . --no-interactive
+
+ publish-espressif:
+ needs: release
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install compote (ESP Component Manager)
+ run: pip install idf-component-manager
+ - name: Publish to Espressif Component Registry
+ env:
+ IDF_COMPONENT_API_TOKEN: ${{ secrets.IDF_COMPONENT_API_TOKEN }}
+ run: compote component upload --name fr_math --namespace deftio
diff --git a/README.md b/README.md
index 784535c..07c7ce5 100644
--- a/README.md
+++ b/README.md
@@ -2,26 +2,23 @@
[](https://github.com/deftio/fr_math/actions/workflows/ci.yml)
[](#building-and-testing)
[](https://deftio.github.io/fr_math/)
-[](release_notes.md)
-
+[](release_notes.md)
+
[](https://registry.platformio.org/libraries/deftio/fr_math)
[](https://github.com/deftio/fr_math)
[](https://components.espressif.com/components/deftio/fr_math)
-
# FR_Math: A C Language Fixed-Point Math Library for Embedded Systems
-FR_Math is a compact, integer-only fixed-point math library built for
-systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from
-legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it
-provides a full suite of math primitives — trigonometry, logarithms,
-roots, transforms, and signal generators — while remaining
-deterministic, portable, and small. Unlike traditional fixed-point
-libraries, FR_Math lets the caller choose the binary point per
-operation, trading precision and range explicitly instead of locking
-into a single format. Pure C (C99/C11/C17) with an optional C++
-2D-transform wrapper. Compiles under Arduino. Zero dependencies
-beyond ``.
+See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API reference, examples, fixed-point primer, build instructions.
+
+
+**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. Optional print utility functions are also provided for pretty printing out fixed point numbers over serial links or buffers.
+
+Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (raddix) per operation, trading precision and range explicitly instead of locking into a single format. FR_math is Pure C (C99/C11/C17,with) with C++ wrappers.
+Compiles under Arduino, PlatformIO, Espressif, many older embedded targets.
+Zero dependencies beyond ``.
+
### Measured accuracy
@@ -29,49 +26,46 @@ Errors below are measured at Q16.16 (s15.16). All functions accept any
radix — Q16.16 is just the reference point for the table.
At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the
-number of fractional bits available. All functions support radix 0 to 30.
-
-
-| Function | Max err (%)*| Avg err (%) | Peak at | Note |
-|---|---:|---:|---:|---|
-| sin/cos (BAM) | 0.4578 | 0.0076 | 94 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
-| sin/cos (deg) | 0.4578 | 0.0076 | -359.5 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
-| sin/cos (rad) | 0.6104 | 0.0085 | -4.721 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
-| tan (BAM) | 0.5823 | 0.0008 | 16360 | fr_tan_bam 65536-pt full; ±maxint at poles |
-| tan (deg) | 0.5311 | 0.0008 | -270.1 | FR_Tan ±360° s15.16 full; sat at poles |
-| tan (rad) | 13.4069 | 0.0029 | -4.713 | fr_tan ±2π r16 full; sat at poles |
-| asin / acos | 0.8743 | 0.0301 | 0.0123 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.5100 | 0.0237 | -2.571 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.3390 | 0.0154 | -0.018 | 20001-pt full sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0239 | 0.0000 | 0.0001 | Round-to-nearest |
-| log2 | 0.0286 | 0.0029 | 0.895 | 65-entry mantissa table |
-| pow2 | 0.0019 | 0.0003 | 3.36 | 65-entry fraction table |
-| ln, log10 | 0.0004 | 0.0000 | 50 | Via FR_MULK28 from log2 |
-| exp | 0.0003 | 0.0000 | 3.91 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0009 | 0.0001 | 3.92 | Shift-only scaling |
-| pow10 | 0.0007 | 0.0000 | 1.97 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.0028 | 0.0002 | 1.99 | Shift-only scaling |
-| hypot (exact) | 0.0000 | 0.0000 | 0 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | 1000 | Shift-only, no multiply |
-
-*Relative error; reference clamped to 1% of full-scale output. "Peak at" = input that produced max error.
-
-
+number of fractional bits available.
+
+| Function | Max err (%)* | Avg err (%) | Note |
+| --- | --- | --- | --- |
+| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
+| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
+| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
+| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles |
+| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass |
+| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 |
+| sqrt | 0.0000 | 0.0000 | Round-to-nearest |
+| log2 | 0.0116 | 0.0016 | 65-entry mantissa table |
+| pow2 | 0.0018 | 0.0004 | 65-entry fraction table |
+| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 |
+| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 |
+| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
+| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 |
+| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
+
+
### What's in the box
| Area | Functions |
-|---|---|
-| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` |
-| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` |
-| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` |
-| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` |
-| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` |
-| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` |
-| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` |
-| Wave generators | `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` |
-| Envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_release`, `fr_adsr_step` |
-| 2D transforms | `FR_Matrix2D_CPT` (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) |
-| Formatted output | `FR_printNumD`, `FR_printNumF`, `FR_printNumH`, `FR_numstr` |
+| --- | --- |
+| Arithmetic | FR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX |
+| Utility | FR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN |
+| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam |
+| Trig (degree) | fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI |
+| Inverse trig | FR_atan, FR_atan2, FR_asin, FR_acos |
+| Log / exp | FR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 |
+| Roots | FR_sqrt, FR_hypot, FR_hypot_fast8 |
+| Wave generators | fr_wave_sqr, fr_wave_pwm, fr_wave_tri, fr_wave_saw, fr_wave_tri_morph, fr_wave_noise |
+| Envelope | fr_adsr_init, fr_adsr_trigger, fr_adsr_release, fr_adsr_step |
+| 2D transforms | FR_Matrix2D_CPT (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) |
+| Formatted output | FR_printNumD, FR_printNumF, FR_printNumH, FR_numstr |
### Library size (FR_math.c only, `-Os`)
@@ -80,24 +74,28 @@ sizes may vary depending on optimization and linker settings. Sizes
include all code and internal tables; everything is ROMable.
-| Target | Core | Full |
-|--------|-----:|-----:|
-| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB |
-| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB |
-| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB |
-| ESP32 (Xtensa) | 3.5 KB | 5.2 KB |
-| 68k | 3.5 KB | 5.3 KB |
-| x86-64 (GCC) | 3.5 KB | 5.7 KB |
-| x86-32 | 4.5 KB | 6.8 KB |
-| MSP430 (16-bit) | 5.9 KB | 8.9 KB |
-| 68HC11 | 10.8 KB | 16.0 KB |
-| AVR (ATmega328P) | 7.0 KB | 10.6 KB |
+| Target | Lean | Core | Full |
+| --- | ---:| ---:| ---:|
+| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB |
+| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB |
+| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB |
+| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB |
+| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB |
+| 68k | 4.4 KB | 6.2 KB | 7.8 KB |
+| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB |
+| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB |
+| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB |
+| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB |
+| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB |
+| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB |
-Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves).
+Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt).
+Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot).
+Full = all features (+ print, waves, ADSR).
The optional 2D module adds ~1 KB.
\* MSP430, 68HC11, and AVR are 8/16-bit — every 32-bit operation expands to multiple instructions.
-See [`docker/`](docker/) for the cross-compile setup.
+See [Building & Testing](docs/building.md) for the full cross-compile setup.
### Lean build options
@@ -106,10 +104,10 @@ for ROM-constrained targets. Define them before including `FR_math.h`
(or pass `-D` on the compiler command line):
| Define | What it removes | Typical savings |
-|---|---|---|
-| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB |
-| `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB |
-| `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB |
+| --- | --- | --- |
+| FR_CORE_ONLY | Everything below (print + waves) | ~1.9 KB |
+| FR_NO_PRINT | FR_printNumF, FR_printNumD, FR_printNumH, FR_numstr | ~1.3 KB |
+| FR_NO_WAVES | fr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC | ~0.6 KB |
`FR_CORE_ONLY` is a convenience shorthand that defines both
`FR_NO_PRINT` and `FR_NO_WAVES` in one step.
@@ -134,7 +132,7 @@ make lib # build static library
make test # run all tests (unit, TDD characterization, 2D)
```
-## Quick taste
+## Example
```c
#include "FR_math.h"
@@ -216,23 +214,25 @@ The full docs ship in two forms — pick whichever fits how you read.
**Terminal / editor (plain markdown):**
- [docs/README.md](docs/README.md) — same content as plain markdown.
- - [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md)
- - [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md)
+- [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md)
+- [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md)
## History
FR_Math has been in service since 2000, originally built for graphics
transforms on 16 MHz 68k Palm Pilots. It shipped inside Trumpetsoft's
*Inkstorm* on PalmOS, then moved forward through ARM, x86, MIPS,
-RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current
-release with a full test suite, bit-exact numerical specification, and
-CI on every push.
-
+RISC-V, and various 8/16-bit embedded targets.
+The current release now has a full test suite, bit-exact numerical specification, and
+CI on every push and better documentation.
+
## License
BSD-2-Clause — see [LICENSE.txt](LICENSE.txt).
(c) 2000-2026 M. Chatterjee
+PRs and suggestions are welcome. Please be detailed as embedded systems can involve many tradeoffs.
+
## For AI coding agents
- [llms.txt](llms.txt) — machine-readable API summary
@@ -241,4 +241,4 @@ BSD-2-Clause — see [LICENSE.txt](LICENSE.txt).
## Version
2.0.7 — see [release_notes.md](release_notes.md) for the v1 → v2
-migration guide, numerical fixes, and new functionality.
+migration guide, numerical fixes, and new functionality.
\ No newline at end of file
diff --git a/VERSION b/VERSION
index f1547e6..815e68d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.7
+2.0.8
diff --git a/agents.md b/agents.md
index 2a461f6..806064d 100644
--- a/agents.md
+++ b/agents.md
@@ -14,9 +14,8 @@ Integer-only, zero dependencies, caller-selectable radix (binary point).
```
src/ Core library (this is what ships)
FR_math.h Public API — all macros, function declarations, constants
- FR_math.c All function implementations
+ FR_math.c All function implementations (trig tables inlined)
FR_defs.h Type aliases (s8, s16, s32, u8, u16, u32)
- FR_trig_table.h Precomputed sine table
FR_math_2D.h/.cpp Optional C++ 2D transform class
tests/ Test suite (7 programs, run via `make test`)
diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json
index b9c1b3b..76b25d9 100644
--- a/compare_lfm/comparison_results.json
+++ b/compare_lfm/comparison_results.json
@@ -13,9 +13,9 @@
"double_reference": "std::sin",
"sweep": "65536-pt, [-pi, +pi]",
"speed": {
- "fr_math_ns_per_call": 5.6,
- "libfixmath_ns_per_call": 10.6,
- "fr_math_speedup": 1.91,
+ "fr_math_ns_per_call": 7.4,
+ "libfixmath_ns_per_call": 12.7,
+ "fr_math_speedup": 1.72,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -43,9 +43,9 @@
"double_reference": "std::cos",
"sweep": "65536-pt, [-pi, +pi]",
"speed": {
- "fr_math_ns_per_call": 8.9,
- "libfixmath_ns_per_call": 13.3,
- "fr_math_speedup": 1.50,
+ "fr_math_ns_per_call": 9.9,
+ "libfixmath_ns_per_call": 13.2,
+ "fr_math_speedup": 1.34,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -73,19 +73,19 @@
"double_reference": "std::tan",
"sweep": "65536-pt, [-1.2, 1.2] rad",
"speed": {
- "fr_math_ns_per_call": 7.1,
- "libfixmath_ns_per_call": 32.6,
- "fr_math_speedup": 4.57,
+ "fr_math_ns_per_call": 14.1,
+ "libfixmath_ns_per_call": 37.2,
+ "fr_math_speedup": 2.64,
"faster": "fr_math"
},
"accuracy_vs_double": {
"fr_math": {
- "max_abs_error": 1.98158306e-04,
- "mean_abs_error": 3.37019908e-05,
- "max_error_lsb": 13.0,
- "mean_error_lsb": 2.2,
- "max_rel_error_pct": 0.1551,
- "mean_rel_error_pct": 0.0055
+ "max_abs_error": 5.06554437e-04,
+ "mean_abs_error": 5.84009618e-05,
+ "max_error_lsb": 33.2,
+ "mean_error_lsb": 3.8,
+ "max_rel_error_pct": 0.5850,
+ "mean_rel_error_pct": 0.0122
},
"libfixmath": {
"max_abs_error": 1.82495961e-02,
@@ -104,9 +104,9 @@
"double_reference": "std::asin",
"sweep": "65536-pt, [-0.999, 0.999]",
"speed": {
- "fr_math_ns_per_call": 9.7,
- "libfixmath_ns_per_call": 49.5,
- "fr_math_speedup": 5.11,
+ "fr_math_ns_per_call": 11.9,
+ "libfixmath_ns_per_call": 64.0,
+ "fr_math_speedup": 5.38,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -134,9 +134,9 @@
"double_reference": "std::acos",
"sweep": "65536-pt, [-0.999, 0.999]",
"speed": {
- "fr_math_ns_per_call": 8.4,
- "libfixmath_ns_per_call": 50.7,
- "fr_math_speedup": 6.03,
+ "fr_math_ns_per_call": 11.1,
+ "libfixmath_ns_per_call": 65.0,
+ "fr_math_speedup": 5.88,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -164,9 +164,9 @@
"double_reference": "std::atan",
"sweep": "65536-pt, [-50, 50]",
"speed": {
- "fr_math_ns_per_call": 8.1,
- "libfixmath_ns_per_call": 11.0,
- "fr_math_speedup": 1.37,
+ "fr_math_ns_per_call": 10.8,
+ "libfixmath_ns_per_call": 14.8,
+ "fr_math_speedup": 1.36,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -194,9 +194,9 @@
"double_reference": "std::atan2",
"sweep": "65536-pt, 5 radii x 360 deg",
"speed": {
- "fr_math_ns_per_call": 15.9,
- "libfixmath_ns_per_call": 10.9,
- "fr_math_speedup": 0.69,
+ "fr_math_ns_per_call": 20.8,
+ "libfixmath_ns_per_call": 13.7,
+ "fr_math_speedup": 0.66,
"faster": "libfixmath"
},
"accuracy_vs_double": {
@@ -225,9 +225,9 @@
"double_reference": "std::sqrt",
"sweep": "65536-pt, [0.01, 100]",
"speed": {
- "fr_math_ns_per_call": 18.6,
- "libfixmath_ns_per_call": 19.9,
- "fr_math_speedup": 1.07,
+ "fr_math_ns_per_call": 24.8,
+ "libfixmath_ns_per_call": 26.2,
+ "fr_math_speedup": 1.05,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -255,9 +255,9 @@
"double_reference": "std::exp",
"sweep": "65536-pt, [-5, 5]",
"speed": {
- "fr_math_ns_per_call": 3.0,
- "libfixmath_ns_per_call": 64.7,
- "fr_math_speedup": 21.28,
+ "fr_math_ns_per_call": 4.0,
+ "libfixmath_ns_per_call": 84.6,
+ "fr_math_speedup": 21.04,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -285,9 +285,9 @@
"double_reference": "std::log",
"sweep": "65536-pt, [0.01, 100]",
"speed": {
- "fr_math_ns_per_call": 9.0,
- "libfixmath_ns_per_call": 453.2,
- "fr_math_speedup": 50.53,
+ "fr_math_ns_per_call": 11.2,
+ "libfixmath_ns_per_call": 583.3,
+ "fr_math_speedup": 51.87,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -315,9 +315,9 @@
"double_reference": "std::log2",
"sweep": "65536-pt, [0.01, 100]",
"speed": {
- "fr_math_ns_per_call": 8.5,
- "libfixmath_ns_per_call": 39.4,
- "fr_math_speedup": 4.63,
+ "fr_math_ns_per_call": 11.3,
+ "libfixmath_ns_per_call": 51.1,
+ "fr_math_speedup": 4.53,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -345,8 +345,8 @@
"double_reference": "double a*b",
"sweep": "65536-pt, a in [-50,50], b in [-2,2]",
"speed": {
- "fr_math_ns_per_call": 0.9,
- "libfixmath_ns_per_call": 1.2,
+ "fr_math_ns_per_call": 1.2,
+ "libfixmath_ns_per_call": 1.6,
"fr_math_speedup": 1.33,
"faster": "fr_math"
},
@@ -375,9 +375,9 @@
"double_reference": "double a/b",
"sweep": "65536-pt, a/b in [-50,50]/[0.5,50]",
"speed": {
- "fr_math_ns_per_call": 0.9,
- "libfixmath_ns_per_call": 5.3,
- "fr_math_speedup": 6.10,
+ "fr_math_ns_per_call": 1.2,
+ "libfixmath_ns_per_call": 6.9,
+ "fr_math_speedup": 5.96,
"faster": "fr_math"
},
"accuracy_vs_double": {
@@ -406,7 +406,7 @@
"double_reference": "std::hypot",
"sweep": "65536-pt, 5 radii x 360 deg",
"speed": {
- "fr_math_ns_per_call": 19.9
+ "fr_math_ns_per_call": 26.2
},
"accuracy_vs_double": {
"fr_math": {
@@ -425,7 +425,7 @@
"double_reference": "std::hypot",
"sweep": "65536-pt, 5 radii x 360 deg",
"speed": {
- "fr_math_ns_per_call": 2.6
+ "fr_math_ns_per_call": 3.2
},
"accuracy_vs_double": {
"fr_math": {
diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md
index e547ce4..0f467ad 100644
--- a/compare_lfm/comparison_summary.md
+++ b/compare_lfm/comparison_summary.md
@@ -25,7 +25,7 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
|----------|----------:|---------:|---------:|----------:|---------:|---------:|--------|
| sin | 4.9 | 0.4816 | 0.0081 | 507.6 | 74.5513 | 0.6105 | FR |
| cos | 4.4 | 0.3282 | 0.0077 | 508.3 | 74.4001 | 0.6121 | FR |
-| tan | 13.0 | 0.1551 | 0.0055 | 1196.0 | 0.7099 | 0.0410 | FR |
+| tan | 33.2 | 0.5850 | 0.0122 | 1196.0 | 0.7099 | 0.0410 | FR |
| asin | 24.9 | 1.9776 | 0.0477 | 667.1 | 20.1233 | 2.4452 | FR |
| acos | 24.6 | 0.2724 | 0.0093 | 667.8 | 15.3142 | 0.3475 | FR |
| atan | 59.9 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR |
@@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01.
| Function | FR_math | libfixmath | Speedup | Faster |
|----------|--------:|-----------:|--------:|--------|
-| sin | 5.6 | 10.6 | 1.91x | FR |
-| cos | 8.9 | 13.3 | 1.50x | FR |
-| tan | 7.1 | 32.6 | 4.57x | FR |
-| asin | 9.7 | 49.5 | 5.11x | FR |
-| acos | 8.4 | 50.7 | 6.03x | FR |
-| atan | 8.1 | 11.0 | 1.37x | FR |
-| atan2 | 15.9 | 10.9 | 0.69x | lfm |
-| sqrt | 18.6 | 19.9 | 1.07x | FR |
-| exp | 3.0 | 64.7 | 21.28x | FR |
-| ln | 9.0 | 453.2 | 50.53x | FR |
-| log2 | 8.5 | 39.4 | 4.63x | FR |
-| mul | 0.9 | 1.2 | 1.33x | FR |
-| div | 0.9 | 5.3 | 6.10x | FR |
-| hypot | 19.9 | --- | --- | FR only |
-| hypot_fast8 | 2.6 | --- | --- | FR only |
+| sin | 7.4 | 12.7 | 1.72x | FR |
+| cos | 9.9 | 13.2 | 1.34x | FR |
+| tan | 14.1 | 37.2 | 2.64x | FR |
+| asin | 11.9 | 64.0 | 5.38x | FR |
+| acos | 11.1 | 65.0 | 5.88x | FR |
+| atan | 10.8 | 14.8 | 1.36x | FR |
+| atan2 | 20.8 | 13.7 | 0.66x | lfm |
+| sqrt | 24.8 | 26.2 | 1.05x | FR |
+| exp | 4.0 | 84.6 | 21.04x | FR |
+| ln | 11.2 | 583.3 | 51.87x | FR |
+| log2 | 11.3 | 51.1 | 4.53x | FR |
+| mul | 1.2 | 1.6 | 1.33x | FR |
+| div | 1.2 | 6.9 | 5.96x | FR |
+| hypot | 26.2 | --- | --- | FR only |
+| hypot_fast8 | 3.2 | --- | --- | FR only |
### Summary (13 head-to-head functions)
diff --git a/dev/misc/FR_math.c.checkpoint3 b/dev/misc/FR_math.c.checkpoint3
new file mode 100644
index 0000000..45c75c2
--- /dev/null
+++ b/dev/misc/FR_math.c.checkpoint3
@@ -0,0 +1,1705 @@
+/**
+ *
+ * @file FR_math.c - c implementation file for basic fixed
+ * radix math routines
+ *
+ * @copy Copyright (C) <2001-2026>
+ * @author M A Chatterjee
+ *
+ * This file contains integer math settable fixed point radix math routines for
+ * use on systems in which floating point is not desired or unavailable.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, please place an acknowledgment in the product documentation.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ * distribution.
+ *
+ */
+
+#include "FR_math.h"
+#include "FR_trig_table.h"
+
+#ifndef FR_NO_STDINT
+#include
+#endif
+
+/*=======================================================
+ * Full-precision radian/degree → BAM conversion helpers
+ *
+ * rad_to_bam_full(r) returns a full s32 BAM value where:
+ * upper 16 bits = integer BAM (the u16 table index)
+ * lower 16 bits = sub-BAM fractional part
+ * Input r must already be normalized to radix 16 and reduced to [-pi, pi].
+ *
+ * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are
+ * reordered so intermediate sums stay within s32 for |r| <= pi at r16.
+ */
+static s32 rad_to_bam_full(s32 r)
+{
+ /* 10 terms: 65536/(2*pi) ≈ 10430.37835...
+ * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14
+ * = 10430.378357 (~21-bit accuracy)
+ * Terms reordered: interleave negatives early to keep all intermediate
+ * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */
+ return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14);
+}
+
+/* deg_to_bam_full(d) — same idea for degrees.
+ * Input d must already be normalized to radix 16 and reduced to [-90, 90).
+ * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM).
+ * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */
+static s32 deg_to_bam_full(s32 d)
+{
+ return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9);
+}
+
+/* Normalize a fixed-radix value to radix 16. */
+static s32 normalize_to_r16(s32 val, u16 radix)
+{
+ return (radix > 16) ? (val >> (radix - 16))
+ : (radix < 16) ? (val << (16 - radix))
+ : val;
+}
+
+/* Reduce non-negative radian (at r16) to [0, 2*pi).
+ * Helper used by range_reduce_rad and the near-pi small-angle paths. */
+static s32 reduce_to_2pi(s32 r)
+{
+ const s32 two_pi = FR_TWO_PI(16); /* 411775 */
+ if (r > (two_pi << 1))
+ r -= (r / two_pi) * two_pi;
+ else if (r > two_pi)
+ r -= two_pi;
+ return r;
+}
+
+/* Range-reduce radian value (at r16, non-negative) to [-pi, pi].
+ * Caller guarantees r >= 0 (sign is handled externally). */
+static s32 range_reduce_rad(s32 r)
+{
+ r = reduce_to_2pi(r);
+ if (r > FR_PI(16))
+ r -= FR_TWO_PI(16);
+ return r;
+}
+
+/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion.
+ * Normalizes to r16, reduces via positive-only path, applies shift-only multiply.
+ * Handles inputs beyond ±2*pi with modulus (slow path). */
+u16 fr_rad_to_bam(s32 rad, u16 radix)
+{
+ s32 r = normalize_to_r16(rad, radix);
+ /* BAM wraps naturally in u16, but range_reduce expects non-negative.
+ * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */
+ s32 sign = 1;
+ if (r < 0) { r = -r; sign = -1; }
+ r = range_reduce_rad(r);
+ s32 bam_full = rad_to_bam_full(r);
+ if (sign < 0) bam_full = -bam_full;
+ return (u16)((bam_full + (1 << 15)) >> 16);
+}
+
+/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion.
+ * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */
+u16 fr_deg_to_bam(s32 deg, u16 radix)
+{
+ s32 d = normalize_to_r16(deg, radix);
+
+ /* Reduce to [-180, 180) */
+ if (d >= FR_D360_R16 || d < -FR_D360_R16) {
+ s32 n = d / FR_D360_R16;
+ d -= n * FR_D360_R16;
+ }
+ if (d >= FR_D180_R16) d -= FR_D360_R16;
+ if (d < -FR_D180_R16) d += FR_D360_R16;
+
+ /* Reduce to [-90, 90) with BAM quadrant offset */
+ u16 offset = 0;
+ if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; }
+ else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; }
+
+ return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16));
+}
+
+/*=======================================================
+ * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan
+ *
+ * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits
+ * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd
+ * quadrants (1, 3) reverse the in-quadrant index so the table is always read
+ * in the same direction.
+ *
+ * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at
+ * index 128). After mirroring, small full_pos → small output (near zero),
+ * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles
+ * below one table step (~0.7°). This eliminates table quantization error
+ * in the region where it matters most.
+ *
+ * Sign rule: quadrants 2 and 3 negate the result.
+ * Mirror rule: quadrants 1 and 3 flip the in-quadrant position.
+ */
+s32 fr_sin_bam(u16 bam)
+{
+ u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */
+ u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */
+
+ /* Exact cardinal angles */
+ if (inq == 0) {
+ if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */
+ if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */
+ return -FR_TRIG_ONE; /* 270° → -1.0 */
+ }
+
+ /* Odd quadrants mirror: read table from the far end */
+ if (q == 1 || q == 3)
+ inq = FR_TRIG_QUADRANT - inq;
+
+ s32 v;
+
+ /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step).
+ * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384.
+ * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */
+ if (inq < FR_TRIG_FRAC_MAX) {
+ v = (s32)(((u32)inq * 102944u + 8192u) >> 14);
+ } else {
+ /* Table lookup with 7-bit interpolation fraction */
+ u32 idx = inq >> FR_TRIG_FRAC_BITS;
+ u32 frac = inq & FR_TRIG_FRAC_MASK;
+ s32 lo = (s32)gFR_SIN_TAB_Q[idx];
+ s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1];
+ v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS);
+ v <<= 1; /* u0.15 → s15.16 */
+ }
+
+ return (q >= 2) ? -v : v;
+}
+
+s32 fr_cos_bam(u16 bam)
+{
+ /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */
+ return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT));
+}
+
+s32 fr_cos(s32 rad, u16 radix)
+{
+ if (rad == 0) return FR_TRIG_ONE;
+ s32 r = normalize_to_r16(rad, radix);
+ if (r < 0) r = -r;
+ r = reduce_to_2pi(r);
+ /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ,
+ * cos(3π/2+δ) = sin(δ) ≈ δ. */
+ s32 delta = r - FR_HALF_PI(16);
+ if (delta >= -256 && delta <= 256)
+ return -delta;
+ delta = r - FR_THREE_HALF_PI(16);
+ if (delta >= -256 && delta <= 256)
+ return delta;
+ return fr_cos_bam(fr_rad_to_bam(rad, radix));
+}
+
+s32 fr_sin(s32 rad, u16 radix)
+{
+ if (rad == 0) return 0;
+ s32 r = normalize_to_r16(rad, radix);
+ s32 sign = 1;
+ if (r < 0) { r = -r; sign = -1; }
+ r = reduce_to_2pi(r);
+ /* Near 0 after reduction: sin(δ) ≈ δ */
+ if (r < 256) {
+ s32 v = r;
+ return (sign < 0) ? -v : v;
+ }
+ /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */
+ s32 delta = r - FR_PI(16);
+ if (delta >= -256 && delta <= 256) {
+ s32 v = -delta;
+ return (sign < 0) ? -v : v;
+ }
+ /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */
+ delta = FR_TWO_PI(16) - r;
+ if (delta >= 0 && delta < 256) {
+ s32 v = -delta;
+ return (sign < 0) ? -v : v;
+ }
+ /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */
+ if (r > FR_PI(16)) r -= FR_TWO_PI(16);
+ u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16);
+ s32 v = fr_sin_bam(bam);
+ return (sign < 0) ? -v : v;
+}
+
+/*=======================================================
+ * BAM-native tangent: fr_tan_bam
+ *
+ * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant
+ * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the
+ * second octant (45°, 90°). Result is s15.16 with saturation at the
+ * poles.
+ *
+ * No 64-bit intermediates. One 32-bit division only in the >45° path.
+ */
+s32 fr_tan_bam(u16 bam)
+{
+ u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */
+ u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */
+ s32 sign = 1;
+ u32 idx, frac;
+ s32 lo, hi, raw;
+
+ /* Exact zeros: bam lands exactly on 0° or 180° */
+ if (inq == 0 && (q == 0 || q == 2))
+ return 0;
+
+ /* Poles: bam lands exactly on 90° or 270° */
+ if (inq == 0 && (q == 1 || q == 3))
+ return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+
+ /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */
+ if (q == 1 || q == 3) {
+ inq = 0x4000u - inq;
+ sign = -1;
+ }
+
+ /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive.
+ * Split into first octant [0, 45°) and second octant [45°, 90°). */
+ if (inq < FR_TAN_OCTANT) {
+ /* First octant: direct table lookup + lerp.
+ * inq is 13 bits; top FR_TAN_TABLE_BITS index the table,
+ * bottom FR_TAN_FRAC_BITS drive interpolation. */
+ idx = inq >> FR_TAN_FRAC_BITS;
+ frac = inq & FR_TAN_FRAC_MASK;
+ lo = (s32)gFR_TAN_TAB_O[idx];
+ hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+ if (raw < 0x40) {
+ /* Near zero: redo interpolation with 4 extra bits of
+ * precision to reduce rounding error when result is small. */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+ raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */
+ } else {
+ raw <<= 1; /* u0.15 → s15.16 */
+ }
+ } else {
+ /* Second octant: tan(x) = 1 / tan(90° - x).
+ * complement is in (0, 0x2000] = (0°, 45°]. */
+ u32 comp = 0x4000u - inq;
+
+ /* Look up tan(complement) from the table */
+ idx = comp >> FR_TAN_FRAC_BITS;
+ frac = comp & FR_TAN_FRAC_MASK;
+ lo = (s32)gFR_TAN_TAB_O[idx];
+ hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+
+ if (raw < 0x40) {
+ /* Near pole: redo interpolation with 4 extra bits of
+ * precision. The reciprocal amplifies small interpolation
+ * errors, so extra precision significantly helps here.
+ * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS);
+ if (raw_hp < 32) {
+ raw = FR_TRIG_MAXVAL;
+ } else {
+ raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+ }
+ } else {
+ raw = (s32)(0x80000000u / (u32)raw);
+ }
+ }
+
+ return (sign < 0) ? -raw : raw;
+}
+
+/* fr_tan — radian-input tangent with full sub-BAM precision.
+ *
+ * Goes directly to the 65-entry octant tangent table with 16-bit
+ * interpolation precision. Sign from quadrant, magnitude from table.
+ * No s64 intermediates. One 32-bit division in the second-octant path.
+ *
+ * Architecture:
+ * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-)
+ * 2. Magnitude: from octant table lookup + reciprocal identity
+ * - First octant [0,45°): direct table lerp
+ * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal
+ * 3. Return sign * magnitude */
+
+/* Internal: given a full s32 BAM, compute |tan| directly from the table.
+ * Returns the unsigned magnitude (always >= 0). */
+static s32 tan_mag_from_bam_full(s32 bam_full)
+{
+ u16 bam0 = (u16)(bam_full >> 16);
+ u32 frac_sub = (u32)bam_full & 0xFFFFu;
+
+ u32 q = ((u32)bam0 >> 14) & 0x3u;
+ u32 inq = (u32)bam0 & 0x3FFFu;
+
+ /* Exact zeros: tan(0°) = tan(180°) = 0 */
+ if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2))
+ return 0;
+
+ /* Exact poles: tan(90°) = tan(270°) → saturate */
+ if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3))
+ return FR_TRIG_MAXVAL;
+
+ /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range.
+ * After this, full_pos represents distance from the nearest zero. */
+ u32 full_pos;
+ if (q == 1 || q == 3)
+ full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub;
+ else
+ full_pos = ((u32)inq << 16) + frac_sub;
+
+ /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */
+ s32 raw;
+ if (full_pos < ((u32)FR_TAN_OCTANT << 16)) {
+ /* First octant [0, 45°): direct table lookup.
+ * 64 table intervals, each 2^23 sub-BAM units wide. */
+ u32 idx = full_pos >> 23;
+ u32 frac16 = (full_pos >> 7) & 0xFFFFu;
+
+ s32 lo = (s32)gFR_TAN_TAB_O[idx];
+ s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+ if (raw < 0x40) {
+ /* Near zero: redo with 4 extra bits of precision */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+ raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */
+ } else {
+ raw <<= 1; /* u0.15 → s15.16 */
+ }
+ } else {
+ /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x).
+ * Complement = distance from pole, in first-octant range. */
+ u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos;
+
+ u32 idx = comp >> 23;
+ u32 frac16 = (comp >> 7) & 0xFFFFu;
+
+ s32 lo = (s32)gFR_TAN_TAB_O[idx];
+ s32 hi = (s32)gFR_TAN_TAB_O[idx + 1];
+ raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16);
+
+ if (raw < 0x40) {
+ /* Near pole: redo with 4 extra bits then reciprocal */
+ s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4;
+ s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4;
+ s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16);
+ if (raw_hp < 32)
+ raw = FR_TRIG_MAXVAL;
+ else
+ raw = (s32)((0x80000000u / (u32)raw_hp) << 4);
+ } else {
+ raw = (s32)(0x80000000u / (u32)raw);
+ }
+ }
+ return raw;
+}
+
+s32 fr_tan(s32 rad, u16 radix)
+{
+ if (rad == 0) return 0;
+ /* tan(-x) = -tan(x): factor out sign, reduce positive */
+ s32 r = normalize_to_r16(rad, radix);
+ s32 tan_sign = 1;
+ if (r < 0) { r = -r; tan_sign = -1; }
+ r = reduce_to_2pi(r);
+ /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */
+ s32 delta = r - FR_PI(16);
+ if (delta >= -256 && delta <= 256) {
+ return (tan_sign < 0) ? -delta : delta;
+ }
+ /* Full pipeline */
+ if (r > FR_PI(16))
+ r -= FR_TWO_PI(16);
+ s32 bam_full = rad_to_bam_full(r);
+
+ /* Sign from quadrant of the BAM position */
+ u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+ s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+
+ s32 mag = tan_mag_from_bam_full(bam_full);
+ return (sign < 0) ? -mag : mag;
+}
+
+/*=======================================================
+ * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then
+ * call the BAM-native functions. Cardinal angles are exact.
+ */
+
+s32 fr_cos_deg(s32 deg, u16 radix)
+{
+ if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg));
+ if (deg < 0) deg = -deg;
+ /* Exact cardinal angles */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 rem = (deg >> radix) % 360;
+ if (rem == 0) return FR_TRIG_ONE;
+ if (rem == 90) return 0;
+ if (rem == 180) return -FR_TRIG_ONE;
+ if (rem == 270) return 0;
+ }
+ /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180,
+ * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */
+ s32 d = normalize_to_r16(deg, radix);
+ if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; }
+ {
+ const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */
+ s32 delta = d - FR_D90_R16;
+ if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+ s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+ return -dr;
+ }
+ delta = d - (FR_D90_R16 + FR_D180_R16);
+ if (delta >= -DEG_THRESH && delta <= DEG_THRESH) {
+ s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16);
+ return dr;
+ }
+ }
+ return fr_cos_bam(fr_deg_to_bam(deg, radix));
+}
+
+s32 fr_sin_deg(s32 deg, u16 radix)
+{
+ if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg));
+ s32 sign = 1;
+ if (deg < 0) { deg = -deg; sign = -1; }
+ /* Exact cardinal angles */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 rem = (deg >> radix) % 360;
+ if (rem == 0) return 0;
+ if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE;
+ if (rem == 180) return 0;
+ if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE;
+ }
+ s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix));
+ return (sign < 0) ? -v : v;
+}
+
+s32 FR_TanI(s32 deg)
+{
+ /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */
+ s32 rem = deg % 180;
+ if (rem == 90 || rem == -90)
+ return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+ return fr_tan_bam(FR_DEG2BAM_I(deg));
+}
+
+/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */
+static s32 range_reduce_deg_bam_full(s32 deg, u16 radix)
+{
+ s32 d = normalize_to_r16(deg, radix);
+ if (d >= FR_D360_R16) {
+ s32 n = d / FR_D360_R16;
+ d -= n * FR_D360_R16;
+ }
+ if (d >= FR_D180_R16) d -= FR_D360_R16;
+ s32 offset = 0;
+ if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; }
+ else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; }
+ return offset + deg_to_bam_full(d);
+}
+
+s32 fr_tan_deg(s32 deg, u16 radix)
+{
+ if (radix == 0) return FR_TanI(deg);
+ /* tan(-x) = -tan(x): factor out sign, reduce positive */
+ s32 tan_sign = 1;
+ if (deg < 0) { deg = -deg; tan_sign = -1; }
+ /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */
+ s32 frac_mask = (1 << radix) - 1;
+ if ((deg & frac_mask) == 0) {
+ s32 deg_int = deg >> radix;
+ s32 rem = deg_int % 180;
+ if (rem == 0) return 0;
+ if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL;
+ }
+ s32 bam_full = range_reduce_deg_bam_full(deg, radix);
+ u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u;
+ s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign;
+ s32 mag = tan_mag_from_bam_full(bam_full);
+ return (sign < 0) ? -mag : mag;
+}
+
+/*=======================================================
+ * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest)
+ *
+ * Treats x and y as fixed-point values at the same radix r and returns
+ * (x*y) >> r at radix r. The user is responsible for tracking the radix
+ * point and for guaranteeing the product fits in 32 bits.
+ *
+ * Adds 0.5 LSB (0x8000) before the shift so the result rounds to
+ * nearest instead of truncating toward zero.
+ */
+s32 FR_FixMuls(s32 x, s32 y)
+{
+ int64_t v = (int64_t)x * (int64_t)y;
+ return (s32)((v + 0x8000) >> 16);
+}
+
+/*=======================================================
+ * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest)
+ *
+ * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on
+ * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits
+ * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift).
+ */
+s32 FR_FixMulSat(s32 x, s32 y)
+{
+ int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16;
+ if (v > (int64_t)0x7fffffff) return FR_OVERFLOW_POS;
+ if (v < -(int64_t)0x80000000) return FR_OVERFLOW_NEG;
+ return (s32)v;
+}
+
+/*=======================================================
+ FR_FixAddSat (x+y saturated add)
+ programmer must align radix points before using this function
+ */
+s32 FR_FixAddSat(s32 x, s32 y)
+{
+ s32 sum = x + y;
+ if (x < 0)
+ {
+ if (y < 0)
+ return (sum >= 0) ? FR_OVERFLOW_NEG : sum;
+ }
+ else
+ {
+ if (y >= 0)
+ return (sum <= 0) ? FR_OVERFLOW_POS : sum;
+ }
+ return sum;
+}
+
+/* Inverse Trig
+ * acos with binary search of the BAM-native quadrant table.
+ *
+ * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant
+ * cos table for the table entry closest to |input|. Apply quadrant mirror
+ * if input was negative.
+ */
+/* FR_acos — returns radians at out_radix.
+ * Range: [0, pi]. Input is a cosine value at the given radix.
+ *
+ * Uses the 129-entry sine table in reverse: binary-search the ascending
+ * table to find asin(|input|), then acos = pi/2 - asin (with sign handling
+ * for the second quadrant).
+ */
+s32 FR_acos(s32 input, u16 radix, u16 out_radix)
+{
+ s32 v;
+ s16 sign;
+ s32 lo, hi, mid;
+ s32 idx, d, num, frac;
+ s32 input_abs;
+
+ /* Work with absolute value at the caller's radix */
+ sign = (s16)((input < 0) ? 1 : 0);
+ input_abs = sign ? -input : input;
+
+ /* Clamp at the caller's radix */
+ {
+ s32 one = (s32)1 << radix;
+ if (input_abs >= one)
+ return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0;
+ }
+
+ v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */
+
+ /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table
+ * has poor resolution near the top (entries close together).
+ * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121]
+ * means the input is > cos(7*π/256) ≈ 0.9975. */
+ if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8])
+ {
+ s32 one = (s32)1 << radix;
+ s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */
+ s32 two_omx = one_minus_x << 1; /* 2(1-|x|) at caller radix */
+ s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */
+ s32 rad_out = FR_CHRDX(rad_native, radix, out_radix);
+ if (sign)
+ rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out;
+ return rad_out;
+ }
+
+ /* Binary search on the ascending sine table.
+ * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°).
+ *
+ * Find the first index where table[idx] >= v. */
+ lo = 0;
+ hi = FR_TRIG_TABLE_SIZE;
+ while (lo < hi)
+ {
+ mid = (lo + hi) >> 1;
+ if ((s32)gFR_SIN_TAB_Q[mid] < v)
+ lo = mid + 1;
+ else
+ hi = mid;
+ }
+
+ /* lo is now the first index where table[lo] >= v.
+ * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo].
+ * This gives us the asin angle; acos = pi/2 - asin. */
+ idx = lo;
+ if (idx <= 0)
+ {
+ idx = 0;
+ frac = 0;
+ }
+ else if (idx >= FR_TRIG_TABLE_SIZE)
+ {
+ idx = FR_TRIG_TABLE_SIZE - 1;
+ frac = 0;
+ }
+ else
+ {
+ /* Interpolate between table[idx-1] and table[idx].
+ * d = table[idx] - table[idx-1] (>= 0, sin increasing)
+ * num = v - table[idx-1] (how far past table[idx-1])
+ */
+ d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1];
+ num = v - (s32)gFR_SIN_TAB_Q[idx - 1];
+ if (d > 0)
+ frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d;
+ else
+ frac = 0;
+ idx = idx - 1;
+ }
+
+ {
+ /* asin_bam is the angle in first-quadrant BAM whose sin = v */
+ u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac);
+ /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */
+ u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam);
+ if (sign)
+ bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */
+ return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix);
+ }
+}
+
+/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */
+s32 FR_asin(s32 input, u16 radix, u16 out_radix)
+{
+ /* asin(x) = pi/2 - acos(x) */
+ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+ return half_pi - FR_acos(input, radix, out_radix);
+}
+
+/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians
+ * at the specified output radix (s32).
+ *
+ * Range: [-pi, pi]. Returns 0 for atan2(0,0).
+ *
+ * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the
+ * angle with FR_asin or FR_acos (both use the 129-entry cosine table).
+ * To stay in the well-conditioned region of each inverse function we
+ * switch at 45°:
+ * |y| <= |x| → use asin(y/h) — asin stable near 0
+ * |y| > |x| → use acos(x/h) — acos stable near pi/2
+ * This keeps the derivative amplification factor below 1.414x everywhere.
+ */
+s32 FR_atan2(s32 y, s32 x, u16 out_radix)
+{
+ s32 ax, ay, h, q1_angle;
+
+ /* Axis cases — exact angles, no divide. */
+ if (x == 0)
+ {
+ if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */
+ if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */
+ return 0;
+ }
+ if (y == 0)
+ return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */
+
+ ax = (x < 0) ? -x : x;
+ ay = (y < 0) ? -y : y;
+
+ /* Normalise so max(ax,ay) sits in [2^14, 2^15). This gives
+ * FR_hypot_fast8 enough integer bits for the shift-only segments
+ * to produce an accurate ratio — critical when the raw inputs are
+ * small (e.g. atan2(1,1) at radix 0). Scaling both by the same
+ * power of two doesn't change the angle. */
+ {
+ s32 mx = (ax > ay) ? ax : ay;
+ while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; }
+ while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; }
+ }
+
+ h = FR_hypot_fast8((s32)ax, (s32)ay);
+ if (h == 0) return 0; /* degenerate */
+
+ /* Compute the first-quadrant angle (positive, [0..pi/2]).
+ * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15).
+ *
+ * Small-angle fast path: when the minor-axis ratio is small,
+ * asin(x) ≈ x (error < x³/6). Below ~5° the cubic term is
+ * smaller than the table-lookup error, so the direct identity
+ * is both faster and more accurate. Threshold 2753 at r15
+ * corresponds to sin(~4.8°) = 0.084. */
+ #define FR_ATAN2_SMALL 2753
+ if (ay <= ax)
+ {
+ /* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */
+ s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h);
+ if (sin_val < FR_ATAN2_SMALL)
+ q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix);
+ else
+ q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix);
+ }
+ else
+ {
+ /* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */
+ s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h);
+ if (cos_val < FR_ATAN2_SMALL)
+ {
+ /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */
+ s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix);
+ q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix);
+ }
+ else
+ q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix);
+ }
+
+ /* Apply quadrant from signs of x and y.
+ * q1_angle is always positive [0..pi/2]. */
+ {
+ s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix);
+ if (x > 0)
+ return (y > 0) ? q1_angle : -q1_angle;
+ /* x < 0: mirror across y-axis */
+ return (y > 0) ? (pi - q1_angle) : (q1_angle - pi);
+ }
+}
+
+/* FR_atan(input, radix, out_radix) — arctangent of a single argument.
+ * Returns radians at out_radix, range [-pi/2, pi/2].
+ */
+s32 FR_atan(s32 input, u16 radix, u16 out_radix)
+{
+ s32 one = (s32)1 << radix;
+ return FR_atan2(input, one, out_radix);
+}
+
+/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16
+ * fixed point. Entry i = round(2^(i/64) * 65536). Size: 260 bytes.
+ * Used by FR_pow2 to look up the fractional power of 2 with linear
+ * interpolation.
+ */
+static const u32 gFR_POW2_FRAC_TAB[65] = {
+ 65536, 66250, 66971, 67700, 68438, 69183, 69936, 70698,
+ 71468, 72246, 73032, 73828, 74632, 75444, 76266, 77096,
+ 77936, 78785, 79642, 80510, 81386, 82273, 83169, 84074,
+ 84990, 85915, 86851, 87796, 88752, 89719, 90696, 91684,
+ 92682, 93691, 94711, 95743, 96785, 97839, 98905, 99982,
+ 101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031,
+ 110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899,
+ 120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660,
+ 131072
+};
+
+/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix.
+ *
+ * Algorithm: split input into floor(integer) and fractional part. The
+ * fractional part is in [0, 1) by construction (Euclidean / mathematical
+ * floor — the fractional part of -2.3 is +0.7, not -0.3). Then
+ * 2^(int + frac) = 2^int * 2^frac
+ * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int
+ * is a shift.
+ *
+ * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table).
+ * Linear interpolation leaves a small concavity error in each interval.
+ */
+s32 FR_pow2(s32 input, u16 radix)
+{
+ s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result;
+ u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+ /* Mathematical floor: for positive input it's input>>radix; for
+ * negative input we need to round toward -infinity, not toward zero.
+ */
+ if (input >= 0)
+ {
+ flr = (s32)((u32)input >> radix);
+ frac_full = (s32)((u32)input & mask);
+ }
+ else
+ {
+ s32 neg = -input;
+ s32 nflr = (s32)((u32)neg >> radix);
+ s32 nfrc = (s32)((u32)neg & mask);
+ if (nfrc == 0)
+ {
+ flr = -nflr;
+ frac_full = 0;
+ }
+ else
+ {
+ flr = -nflr - 1; /* floor toward -inf */
+ frac_full = (s32)((1L << radix) - nfrc);
+ }
+ }
+
+ /* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */
+ if (radix > 16)
+ frac_full >>= (radix - 16);
+ else if (radix < 16)
+ frac_full <<= (16 - radix);
+ /* now frac_full is in [0, 65536) representing fractional in s.16. */
+
+ /* Top 6 bits index the table; bottom 10 are the interpolation fraction. */
+ idx = frac_full >> 10;
+ frac_lo = frac_full & ((1L << 10) - 1);
+ lo = (s32)gFR_POW2_FRAC_TAB[idx];
+ hi = (s32)gFR_POW2_FRAC_TAB[idx + 1];
+ mant = lo + (((hi - lo) * frac_lo) >> 10); /* mant in s.16, in [1.0, 2.0) */
+
+ /* Apply integer shift. mant is at radix 16. We want output at `radix`.
+ * If radix == 16: just shift mant.
+ * Otherwise re-radix mant first.
+ */
+ if (flr >= 0)
+ {
+ /* result = mant << flr, then re-radix to caller's radix. */
+ if (flr >= 30)
+ return FR_OVERFLOW_POS;
+ result = mant << flr;
+ return FR_CHRDX(result, 16, radix);
+ }
+ else
+ {
+ /* mant >> -flr at radix 16, then re-radix. */
+ s32 sh = -flr;
+ if (sh >= 30)
+ return 0; /* underflow */
+ result = mant >> sh;
+ return FR_CHRDX(result, 16, radix);
+ }
+}
+
+/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m)
+ * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the
+ * interpolation between idx and idx+1 never reads out of bounds.
+ * Size: 260 bytes. Entry i = round(log2(1 + i/64) * 65536).
+ */
+static const u32 gFR_LOG2_MANT_TAB[65] = {
+ 0, 1466, 2909, 4331, 5732, 7112, 8473, 9814,
+ 11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909,
+ 21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029,
+ 30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346,
+ 38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990,
+ 45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063,
+ 52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643,
+ 59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794,
+ 65536
+};
+
+/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number.
+ *
+ * input : value to take log2 of, treated as a positive sM.radix value.
+ * radix : number of fractional bits in `input`.
+ * output_radix : number of fractional bits in the result.
+ *
+ * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we
+ * return a large negative sentinel rather than crash).
+ *
+ * Algorithm:
+ * 1. Find p, the position of the leading 1 bit of `input`.
+ * log2(input) = p + log2(input / 2^p), where the second term is in
+ * [0, 1) because (input / 2^p) is in [1, 2).
+ * 2. Normalize the mantissa to s1.31 by shifting `input` so its top bit
+ * sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1).
+ * 3. Look up log2(m) in the 65-entry table with linear interpolation
+ * across the next 24 bits. Result is in s.16.
+ * 4. integer_part = (p - radix), then result = (integer_part << 16) +
+ * mantissa_log2.
+ * 5. Re-radix to the requested output_radix via FR_CHRDX.
+ *
+ * Worst-case absolute error: ~6e-5 in log2 units (65-entry table).
+ */
+s32 FR_log2(s32 input, u16 radix, u16 output_radix)
+{
+ s32 p, integer_part, idx, frac, lo, hi, mant_log2, result;
+ u32 m, u;
+
+ if (input <= 0)
+ return FR_LOG2MIN;
+
+ /* Step 1: find the position of the leading 1 bit. */
+ u = (u32)input;
+ p = 0;
+ while (u > 1)
+ {
+ u >>= 1;
+ p++;
+ }
+
+ /* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa).
+ * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31).
+ * The fractional part of m / 2^30 is in [0, 1), and that's what we look
+ * up in the table.
+ */
+ if (p >= 30)
+ m = (u32)input >> (p - 30);
+ else
+ m = (u32)input << (30 - p);
+
+ /* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part
+ * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6
+ * bits of m_frac; the lower 24 bits are the interpolation fraction.
+ */
+ m -= (1u << 30);
+ idx = (s32)(m >> 24); /* 6 bits */
+ frac = (s32)(m & ((1u << 24) - 1)); /* 24 bits */
+ lo = (s32)gFR_LOG2_MANT_TAB[idx];
+ hi = (s32)gFR_LOG2_MANT_TAB[idx + 1];
+ mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24);
+
+ /* Step 3: assemble. integer_part = p - radix. */
+ integer_part = p - (s32)radix;
+ result = (integer_part << 16) + mant_log2;
+
+ /* Step 4: re-radix to output_radix. */
+ return FR_CHRDX(result, 16, output_radix);
+}
+
+s32 FR_ln(s32 input, u16 radix, u16 output_radix)
+{
+ s32 r = FR_log2(input, radix, output_radix);
+ return FR_MULK28(r, FR_krLOG2E_28);
+}
+
+s32 FR_log10(s32 input, u16 radix, u16 output_radix)
+{
+ s32 r = FR_log2(input, radix, output_radix);
+ return FR_MULK28(r, FR_krLOG2_10_28);
+}
+
+#ifndef FR_NO_PRINT
+/***************************************
+ * FR_printNumD - write a decimal integer with space padding.
+ *
+ * Equivalent to "%*d" in printf, modulo the return convention.
+ *
+ * f : per-character output function (e.g. putchar). Must not be NULL.
+ * n : signed integer to print.
+ * pad : minimum field width; spaces are prepended to reach this width.
+ *
+ * Returns the number of characters written on success, or -1 if `f` is NULL.
+ */
+int FR_printNumD(int (*f)(char), int n, int pad)
+{
+ unsigned int mag;
+ int written = 0, neg = 0;
+ int digits = 1;
+ unsigned int t;
+
+ if (!f)
+ return -1;
+
+ if (n < 0)
+ {
+ neg = 1;
+ mag = (unsigned int)(-(long)n); /* safe for INT_MIN */
+ }
+ else
+ {
+ mag = (unsigned int)n;
+ }
+
+ /* Count decimal digits in mag (always at least 1 for n=0). */
+ t = mag;
+ while (t >= 10)
+ {
+ t /= 10;
+ digits++;
+ }
+
+ /* Pad with spaces. The total width includes the sign. */
+ {
+ int total = digits + (neg ? 1 : 0);
+ while (pad-- > total)
+ {
+ f(' ');
+ written++;
+ }
+ }
+
+ if (neg)
+ {
+ f('-');
+ written++;
+ }
+
+ /* Print digits MSB first by computing the largest power of 10 <= mag. */
+ {
+ unsigned int p = 1;
+ int i;
+ for (i = 1; i < digits; i++)
+ p *= 10;
+ while (p > 0)
+ {
+ f((char)('0' + (mag / p) % 10));
+ written++;
+ if (p == 1)
+ break;
+ p /= 10;
+ }
+ }
+
+ return written;
+}
+
+/***************************************
+ * FR_printNumF - write a fixed-point number as a decimal floating-point string.
+ *
+ * f : per-character output function. Must not be NULL.
+ * n : signed fixed-point value at the given radix.
+ * radix : number of fractional bits in `n`.
+ * pad : minimum field width (including sign and decimal point).
+ * prec : number of fractional digits to print.
+ *
+ * Returns the number of characters written on success, -1 if `f` is NULL.
+ *
+ * Rounding policy: truncates fractional digits beyond `prec` (no rounding).
+ */
+int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec)
+{
+ unsigned int mag_int;
+ u32 mag_frac;
+ u32 frac_mask;
+ int written = 0, neg = 0;
+ int int_digits = 1;
+ int total;
+ unsigned int t;
+
+ if (!f)
+ return -1;
+
+ frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0;
+
+ if (n < 0)
+ {
+ neg = 1;
+ /* Negate as unsigned to avoid INT_MIN overflow. */
+ u32 un = (u32)(-(int64_t)n);
+ mag_int = (unsigned int)(un >> radix);
+ mag_frac = un & frac_mask;
+ }
+ else
+ {
+ mag_int = (unsigned int)((u32)n >> radix);
+ mag_frac = (u32)n & frac_mask;
+ }
+
+ /* Count integer digits. */
+ t = mag_int;
+ while (t >= 10)
+ {
+ t /= 10;
+ int_digits++;
+ }
+
+ /* Total visible width = sign + int + (dot + prec digits if prec>0). */
+ total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0);
+ while (pad-- > total)
+ {
+ f(' ');
+ written++;
+ }
+
+ if (neg)
+ {
+ f('-');
+ written++;
+ }
+
+ /* Print integer part. */
+ {
+ unsigned int p = 1;
+ int i;
+ for (i = 1; i < int_digits; i++)
+ p *= 10;
+ while (p > 0)
+ {
+ f((char)('0' + (mag_int / p) % 10));
+ written++;
+ if (p == 1)
+ break;
+ p /= 10;
+ }
+ }
+
+ /* Print fractional part. Extract one decimal digit at a time:
+ * frac' = frac * 10
+ * digit = frac' >> radix
+ * frac = frac' & frac_mask
+ */
+ if (prec > 0)
+ {
+ f('.');
+ written++;
+ while (prec-- > 0)
+ {
+ u32 scaled;
+ int digit;
+ scaled = (u32)(((uint64_t)mag_frac * 10));
+ digit = (int)(scaled >> radix);
+ mag_frac = scaled & frac_mask;
+ f((char)('0' + (digit % 10)));
+ written++;
+ }
+ }
+
+ return written;
+}
+
+/***************************************
+ * FR_printNumH - write an integer as hexadecimal.
+ *
+ * f : per-character output function. Must not be NULL.
+ * n : integer to print (interpreted as unsigned for the digits).
+ * showPrefix : if non-zero, prepend "0x".
+ *
+ * Returns the number of characters written on success, -1 if f is NULL.
+ */
+int FR_printNumH(int (*f)(char), int n, int showPrefix)
+{
+ unsigned int u = (unsigned int)n;
+ int written = 0;
+ int x = (int)((sizeof(int) << 1) - 1);
+ int d;
+
+ if (!f)
+ return -1;
+
+ if (showPrefix)
+ {
+ f('0');
+ f('x');
+ written += 2;
+ }
+
+ do
+ {
+ d = (int)((u >> (x << 2)) & 0xf);
+ d = (d > 9) ? (d - 0xa + 'a') : (d + '0');
+ f((char)d);
+ written++;
+ } while (x--);
+
+ return written;
+}
+
+/*=======================================================
+ * FR_numstr — parse a decimal string into a fixed-point value.
+ *
+ * This is the runtime inverse of FR_printNumF: given a string like
+ * "12.34" or "-0.05" and a radix (number of fractional bits), it
+ * returns the s32 fixed-point representation.
+ *
+ * Features:
+ * - Leading whitespace is skipped.
+ * - Optional sign ('+' or '-').
+ * - Up to 9 fractional digits are used (s32 range).
+ * - No malloc, no strtod, no libm.
+ *
+ * Returns 0 for NULL or empty input.
+ */
+s32 FR_numstr(const char *s, u16 radix)
+{
+ static const s32 pow10[10] = {
+ 1L, 10L, 100L, 1000L, 10000L,
+ 100000L, 1000000L, 10000000L, 100000000L, 1000000000L
+ };
+ s32 int_part = 0, frac_part = 0;
+ int frac_digits = 0, neg = 0;
+ s32 result;
+
+ if (!s || !*s) return 0;
+
+ while (*s == ' ' || *s == '\t') s++; /* skip whitespace */
+ if (*s == '-') { neg = 1; s++; } /* sign */
+ else if (*s == '+') { s++; }
+
+ while (*s >= '0' && *s <= '9') /* integer part */
+ { int_part = int_part * 10 + (*s - '0'); s++; }
+
+ if (*s == '.') { /* fractional part */
+ s++;
+ while (*s >= '0' && *s <= '9') {
+ if (frac_digits < 9)
+ { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; }
+ s++;
+ }
+ }
+
+ result = int_part << radix;
+ if (frac_digits > 0)
+ result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]);
+
+ return neg ? -result : result;
+}
+#endif /* FR_NO_PRINT */
+
+/*=======================================================
+ * Square root and hypot
+ *
+ * fr_isqrt64 is a private helper implementing the digit-by-digit
+ * ("shift-and-subtract") integer square root. The core loop computes
+ * floor(sqrt(n)), then a final remainder check rounds to nearest.
+ * Uses no division. At most 32 iterations.
+ */
+static u32 fr_isqrt64(uint64_t n)
+{
+ uint64_t root = 0;
+ uint64_t bit = (uint64_t)1 << 62;
+ while (bit > n) bit >>= 2;
+ while (bit != 0)
+ {
+ uint64_t trial = root + bit;
+ if (n >= trial)
+ {
+ n -= trial;
+ root = (root >> 1) + bit;
+ }
+ else
+ {
+ root >>= 1;
+ }
+ bit >>= 2;
+ }
+ /* round to nearest: if remainder > root, (root+1)^2 is closer */
+ if (n > root)
+ root++;
+ return (u32)root;
+}
+
+/*=======================================================
+ * FR_sqrt - fixed-radix square root.
+ *
+ * input : value at radix `radix`. Must be >= 0.
+ * radix : fractional bits of input AND result.
+ * return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0.
+ *
+ * Math: sqrt(input_fp / 2^r) at radix r is
+ * result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r)
+ * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This
+ * works for any input that fits in s32 and any radix in [0, 30].
+ *
+ * Precision: round-to-nearest sqrt. Worst-case absolute error is
+ * <= 0.5 LSB at the requested radix.
+ * Always non-negative for non-negative input. Result is monotone in
+ * input.
+ *
+ * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller
+ * can test `result == FR_DOMAIN_ERROR` to detect domain errors.
+ *
+ * Side effects: none. Pure function.
+ */
+s32 FR_sqrt(s32 input, u16 radix)
+{
+ uint64_t n;
+
+ if (input < 0)
+ return FR_DOMAIN_ERROR;
+ if (input == 0)
+ return 0;
+
+ n = (uint64_t)(u32)input << radix;
+ return (s32)fr_isqrt64(n);
+}
+
+/*=======================================================
+ * FR_hypot - sqrt(x*x + y*y) without intermediate overflow.
+ *
+ * x, y : values at radix `radix`
+ * radix : fractional bits of inputs AND result
+ * return : sqrt(x*x + y*y) at radix `radix`.
+ *
+ * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix
+ * value yields an r-radix result, so no extra shifting is needed. The
+ * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never
+ * overflows for any s32 inputs.
+ *
+ * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB
+ * at the requested radix.
+ *
+ * Side effects: none. Pure function.
+ */
+s32 FR_hypot(s32 x, s32 y, u16 radix)
+{
+ uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x);
+ uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y);
+ (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */
+ return (s32)fr_isqrt64(xx + yy);
+}
+
+/*=======================================================
+ * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation.
+ *
+ * Shift-only, no multiply, no 64-bit. Based on the piecewise-linear
+ * method described in US Patent 6,567,777 B1 (Chatterjee, expired).
+ * Peak error: ~0.10%.
+ */
+s32 FR_hypot_fast8(s32 x, s32 y)
+{
+ s32 hi, lo;
+
+ /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */
+ if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x;
+ if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y;
+
+ /* hi = max(|x|,|y|), lo = min(|x|,|y|) */
+ if (x > y) { hi = x; lo = y; }
+ else { hi = y; lo = x; }
+
+ if (hi == 0) return 0;
+
+ /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo.
+ * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */
+ if ((hi >> 1) < lo) {
+ /* β in (0.5, 1.0] */
+ if (lo > hi - (hi >> 2)) {
+ /* β in (0.75, 1.0] */
+ if (lo > hi - (hi >> 3)) /* β > 0.875 */
+ /* a≈0.7305, b≈0.6836 */
+ return hi - (hi >> 2) - (hi >> 6) - (hi >> 8)
+ + lo - (lo >> 2) - (lo >> 4) - (lo >> 8);
+ else /* β in (0.75, 0.875] */
+ /* a≈0.7803, b≈0.6262 */
+ return hi - (hi >> 2) + (hi >> 5) - (hi >> 10)
+ + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12);
+ } else {
+ /* β in (0.5, 0.75] */
+ if (lo > hi - (hi >> 1) + (hi >> 3)) /* β > 0.625 */
+ /* a≈0.8281, b≈0.5630 */
+ return hi - (hi >> 2) + (hi >> 4) + (hi >> 6)
+ + (lo >> 1) + (lo >> 4) + (lo >> 11);
+ else /* β in (0.5, 0.625] */
+ /* a≈0.8728, b≈0.4893 */
+ return hi - (hi >> 3) - (hi >> 9) - (hi >> 12)
+ + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10);
+ }
+ } else {
+ /* β in [0, 0.5] */
+ if ((hi >> 2) < lo) {
+ /* β in (0.25, 0.5] */
+ if ((hi >> 1) - (hi >> 3) < lo) /* β > 0.375 */
+ /* a≈0.9180, b≈0.3984 */
+ return hi - (hi >> 4) - (hi >> 6) - (hi >> 8)
+ + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7);
+ else /* β in (0.25, 0.375] */
+ /* a≈0.9551, b≈0.2988 */
+ return hi - (hi >> 4) + (hi >> 6) + (hi >> 9)
+ + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9);
+ } else {
+ /* β in [0, 0.25] */
+ if ((hi >> 3) < lo) /* β in (0.125, 0.25] */
+ /* a≈0.9839, b≈0.1838 */
+ return hi - (hi >> 6) - (hi >> 11)
+ + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12);
+ else /* β in [0, 0.125] */
+ /* a≈0.9990, b≈0.0620 */
+ return hi - (hi >> 10)
+ + (lo >> 4) - (lo >> 11);
+ }
+ }
+}
+
+#ifndef FR_NO_WAVES
+/*=======================================================
+ * Wave generators — synth-style fixed-shape waveforms.
+ *
+ * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle)
+ * and return s16 in s0.15 format, clamped to [-32767, +32767] to match
+ * the trig amplitude convention used by fr_cos_bam / fr_sin_bam.
+ *
+ * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for
+ * a given output frequency, then accumulate it (mod 2^16) per sample.
+ *
+ * Side effects: pure functions (except fr_wave_noise which advances a
+ * caller-provided LFSR state pointer).
+ */
+
+/* fr_wave_sqr - 50%-duty square wave.
+ * phase < pi (BAM<0x8000) → +full; phase >= pi → -full.
+ */
+s16 fr_wave_sqr(u16 phase)
+{
+ return (phase < 0x8000) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_pwm - variable-duty pulse.
+ * `duty` is the BAM threshold: phase < duty → high, else low.
+ * duty = 0 → always low
+ * duty = 0x8000 → 50% duty (same as fr_wave_sqr)
+ * duty = 0xffff → high almost everywhere (one BAM step low)
+ */
+s16 fr_wave_pwm(u16 phase, u16 duty)
+{
+ return (phase < duty) ? (s16)32767 : (s16)-32767;
+}
+
+/* fr_wave_saw - rising sawtooth.
+ * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff),
+ * passing through 0 at phase=0x8000. The single boundary case phase=0
+ * (which would naturally produce -32768) is clamped to -32767 to keep the
+ * amplitude symmetric.
+ */
+s16 fr_wave_saw(u16 phase)
+{
+ s32 v = (s32)phase - (s32)0x8000;
+ if (v < -32767) v = -32767;
+ return (s16)v;
+}
+
+/* fr_wave_tri - symmetric triangle.
+ * Four linear segments:
+ * Q1 [0, 0x4000) : rising 0 → +peak
+ * Q2 [0x4000, 0x8000): falling +peak → 0
+ * Q3 [0x8000, 0xc000): falling 0 → -peak
+ * Q4 [0xc000, 0x10000): rising -peak → 0
+ * Peaks are clamped to +/-32767 (the natural unclamped formula gives
+ * +/-32768 at the exact peak BAM).
+ */
+s16 fr_wave_tri(u16 phase)
+{
+ s32 t;
+ if (phase < 0x8000)
+ {
+ /* First half: 0 -> +peak -> 0 */
+ if (phase < 0x4000)
+ t = (s32)phase << 1; /* 0 .. 0x7ffe */
+ else
+ t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */
+ if (t > 32767) t = 32767;
+ return (s16)t;
+ }
+ else
+ {
+ /* Second half: 0 -> -peak -> 0 */
+ if (phase < 0xc000)
+ t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */
+ else
+ t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */
+ if (t > 32767) t = 32767;
+ return (s16)-t;
+ }
+}
+
+/* fr_wave_tri_morph - variable-symmetry triangle.
+ *
+ * phase : u16 BAM
+ * break_point : u16 BAM where the wave reaches its positive peak.
+ *
+ * Going from 0 to +peak in [0, break_point), then from +peak back to 0
+ * in [break_point, 0xffff]. The result is a triangle whose rising and
+ * falling slopes can differ.
+ *
+ * break_point = 0x8000 → symmetric triangle
+ * break_point = 0xffff → rising sawtooth (instant fall)
+ * break_point = 0x0001 → falling sawtooth (instant rise)
+ * break_point = 0 → degenerate; treated as 1 to avoid div-by-zero
+ *
+ * Note that this version returns values in [0, 32767] only (not bipolar).
+ * Caller can subtract 16384 and double if a bipolar version is desired.
+ *
+ * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20
+ * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if
+ * those targets matter to you.
+ */
+s16 fr_wave_tri_morph(u16 phase, u16 break_point)
+{
+ u32 t;
+ if (break_point == 0)
+ break_point = 1;
+ if (phase < break_point)
+ {
+ /* rising: 0 at phase=0, 32767 at phase=break_point */
+ t = (u32)(((u32)phase * 32767UL) / (u32)break_point);
+ }
+ else
+ {
+ /* falling: 32767 at phase=break_point, 0 at phase=0xffff */
+ u32 span = (u32)0xffff - (u32)break_point;
+ if (span == 0)
+ return 32767;
+ t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span);
+ }
+ if (t > 32767) t = 32767;
+ return (s16)t;
+}
+
+/* fr_wave_noise - LFSR-based pseudorandom noise.
+ *
+ * state : pointer to a u32 the caller maintains. Initial value must
+ * be non-zero (zero is a fixed point of the LFSR). A common
+ * seed is 0xACE1u or any other non-zero constant.
+ *
+ * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish).
+ * Implementation: 32-bit Galois LFSR with the standard maximal-period
+ * tap polynomial 0xD0000001 (period 2^32 - 1 samples).
+ *
+ * Quality: this is "fast white noise" suitable for synth use. It is NOT
+ * cryptographically secure. For better statistical properties (FFT
+ * flatness etc.) layer a longer LFSR or use a separate PRNG.
+ */
+s16 fr_wave_noise(u32 *state)
+{
+ u32 lsb;
+ if (!state)
+ return 0;
+ lsb = *state & 1u;
+ *state >>= 1;
+ if (lsb)
+ *state ^= 0xD0000001u;
+ /* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */
+ {
+ s32 v = (s32)((*state >> 16) & 0xffffu) - 32768;
+ if (v < -32767) v = -32767;
+ return (s16)v;
+ }
+}
+
+/*=======================================================
+ * ADSR envelope generator
+ *
+ * Linear-segment Attack-Decay-Sustain-Release envelope. State is held
+ * in caller-allocated fr_adsr_t struct (no global state, no malloc).
+ *
+ * Lifecycle:
+ * 1. Caller allocates an fr_adsr_t (stack or static).
+ * 2. fr_adsr_init() once per patch with attack/decay/release durations
+ * in samples and a sustain level in s0.15.
+ * 3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk`
+ * samples, falls peak -> sustain over `dec` samples, then holds.
+ * 4. fr_adsr_release() on note-off. Output falls current -> 0 over a
+ * time controlled by the release rate (rate, not duration: the
+ * time depends on where in the envelope we are).
+ * 5. fr_adsr_step() once per audio sample to read the current value.
+ *
+ * Internal precision: levels are stored as s32 in s1.30 format so even
+ * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second)
+ * have a non-zero per-sample increment. Output is converted to s0.15.
+ *
+ * Saturation: the envelope state machine is self-clamping; level cannot
+ * escape [0, 1<<30]. Output is in [0, 32767].
+ */
+
+#define FR_ADSR_PEAK_S130 ((s32)1 << 30)
+
+void fr_adsr_init(fr_adsr_t *env,
+ u32 attack_samples,
+ u32 decay_samples,
+ s16 sustain_level_s015,
+ u32 release_samples)
+{
+ if (!env)
+ return;
+ env->state = FR_ADSR_IDLE;
+ env->level = 0;
+
+ /* sustain_level_s015 is s16 so its upper bound (32767) is already the
+ * type's max; only the lower bound needs an explicit clamp. */
+ if (sustain_level_s015 < 0)
+ sustain_level_s015 = 0;
+ /* Convert s0.15 -> s1.30 by shifting left 15. */
+ env->sustain = (s32)sustain_level_s015 << 15;
+
+ env->attack_inc = (attack_samples > 0)
+ ? (s32)(FR_ADSR_PEAK_S130 / attack_samples)
+ : FR_ADSR_PEAK_S130;
+ env->decay_dec = (decay_samples > 0)
+ ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples)
+ : (FR_ADSR_PEAK_S130 - env->sustain);
+ env->release_dec = (release_samples > 0)
+ ? (s32)(FR_ADSR_PEAK_S130 / release_samples)
+ : FR_ADSR_PEAK_S130;
+}
+
+void fr_adsr_trigger(fr_adsr_t *env)
+{
+ if (!env)
+ return;
+ env->state = FR_ADSR_ATTACK;
+ env->level = 0;
+}
+
+void fr_adsr_release(fr_adsr_t *env)
+{
+ if (!env)
+ return;
+ env->state = FR_ADSR_RELEASE;
+}
+
+s16 fr_adsr_step(fr_adsr_t *env)
+{
+ if (!env)
+ return 0;
+ switch (env->state)
+ {
+ case FR_ADSR_ATTACK:
+ env->level += env->attack_inc;
+ if (env->level >= FR_ADSR_PEAK_S130)
+ {
+ env->level = FR_ADSR_PEAK_S130;
+ env->state = FR_ADSR_DECAY;
+ }
+ break;
+ case FR_ADSR_DECAY:
+ env->level -= env->decay_dec;
+ if (env->level <= env->sustain)
+ {
+ env->level = env->sustain;
+ env->state = FR_ADSR_SUSTAIN;
+ }
+ break;
+ case FR_ADSR_SUSTAIN:
+ env->level = env->sustain;
+ break;
+ case FR_ADSR_RELEASE:
+ env->level -= env->release_dec;
+ if (env->level <= 0)
+ {
+ env->level = 0;
+ env->state = FR_ADSR_IDLE;
+ }
+ break;
+ case FR_ADSR_IDLE:
+ default:
+ env->level = 0;
+ break;
+ }
+ /* s1.30 -> s0.15: shift right 15. Clamp for safety. */
+ {
+ s32 out = env->level >> 15;
+ if (out < 0) out = 0;
+ if (out > 32767) out = 32767;
+ return (s16)out;
+ }
+}
+#endif /* FR_NO_WAVES */
diff --git a/docs/README.md b/docs/README.md
index dad53da..c7a900a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -45,30 +45,30 @@ radix — Q16.16 is just the reference point for the table. See the
[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12,
16, and 24.
-
-| Function | Max err (%)*| Avg err (%) | Note |
-|---|---:|---:|---|
-| sin/cos (BAM) | 0.4578 | 0.0076 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
-| sin/cos (deg) | 0.4578 | 0.0076 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
-| sin/cos (rad) | 0.6104 | 0.0085 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
-| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles |
-| tan (deg) | 0.5311 | 0.0008 | FR_Tan ±360° s15.16 full; sat at poles |
-| tan (rad) | 13.4069 | 0.0029 | fr_tan ±2π r16 full; sat at poles |
-| asin / acos | 0.8743 | 0.0301 | 65536-pt; sqrt approx near boundary |
-| atan2 | 0.5100 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 |
-| atan | 0.3390 | 0.0154 | 20001-pt full sweep [-10,10]; via FR_atan2 |
-| sqrt | 0.0239 | 0.0000 | Round-to-nearest |
-| log2 | 0.0286 | 0.0029 | 65-entry mantissa table |
-| pow2 | 0.0019 | 0.0003 | 65-entry fraction table |
-| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 |
-| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 |
-| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
-| pow10 | 0.0007 | 0.0000 | FR_MULK28 + FR_pow2 |
-| pow10_fast | 0.0028 | 0.0002 | Shift-only scaling |
-| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate |
-| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
-
-*Relative error; reference clamped to 1% of full-scale output.
+
+| Function | Max err (%)*| Avg err (%) | Note |
+|---|---:|---:|---|
+| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table |
+| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM |
+| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 |
+| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles |
+| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles |
+| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass |
+| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary |
+| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 |
+| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 |
+| sqrt | 0.0000 | 0.0000 | Round-to-nearest |
+| log2 | 0.0116 | 0.0016 | 65-entry mantissa table |
+| pow2 | 0.0018 | 0.0004 | 65-entry fraction table |
+| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 |
+| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 |
+| exp_fast | 0.0009 | 0.0001 | Shift-only scaling |
+| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 |
+| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling |
+| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate |
+| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply |
+
+*Relative error; reference clamped to 1% of full-scale output.
## What's in the box
@@ -221,14 +221,14 @@ understand *how* the radix notation works first.
| Multiply-free option | No | No | Yes (e.g. `FR_EXP_FAST`, `FR_hypot_fast8`) |
| Wave generators | No | No | 6 shapes + ADSR |
| Dependencies | None | ARM only | None |
-| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 4.2 KB |
+| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 3.4 KB lean / 5.7 KB full |
Sizes measured with `arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -Os`.
libfixmath covers trig/sqrt/exp in Q16.16 only; FR_Math includes
log/ln/log10, wave generators, ADSR, print helpers, and variable radix.
CMSIS-DSP estimate is for the math function subset only.
-See [`docker/build_sizes.sh`](../docker/build_sizes.sh) for the build
-script.
+See [`scripts/crossbuild-docker.sh`](../scripts/crossbuild-docker.sh) for
+the build script.
## History
diff --git a/docs/building.md b/docs/building.md
index 4a17c1a..fea0142 100644
--- a/docs/building.md
+++ b/docs/building.md
@@ -160,39 +160,51 @@ you do *not* need `libm`.
### Code size (.text section, compiled with `-Os`)
-Sizes are for `FR_math.c` compiled with `-Os -ffreestanding`.
-Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves).
+Sizes are for `FR_math.c` compiled with `-Os`.
+Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt).
+Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot).
+Full = all features (+ print, waves, ADSR).
With `-ffunction-sections` and linker `--gc-sections`, only the
functions your application references are linked, so real flash
usage will be smaller.
-| Target | Core | Full |
-|--------|-----:|-----:|
-| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB |
-| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB |
-| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB |
-| ESP32 (Xtensa) | 3.5 KB | 5.2 KB |
-| 68k | 3.5 KB | 5.3 KB |
-| x86-64 (GCC) | 3.5 KB | 5.7 KB |
-| x86-32 | 4.5 KB | 6.8 KB |
-| MSP430 (16-bit) | 5.9 KB | 8.9 KB |
-| 68HC11 | 10.8 KB | 16.0 KB |
-| AVR (ATmega328P) | 7.0 KB | 10.6 KB |
+| Target | Lean | Core | Full |
+|--------|-----:|-----:|-----:|
+| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB |
+| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB |
+| ARM Thumb | 3.4 KB | 4.7 KB | 5.9 KB |
+| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB |
+| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB |
+| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB |
+| ARM32 | 4.3 KB | 5.8 KB | 7.7 KB |
+| 68k | 4.4 KB | 6.2 KB | 7.8 KB |
+| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB |
+| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB |
+| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB |
+| PowerPC | 5.8 KB | 8.0 KB | 10.4 KB |
+| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB |
+| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB |
+| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB |
### Lean build options
-Three compile-time `#define` guards let you strip optional subsystems
+Compile-time `#define` guards let you strip optional subsystems
for ROM-constrained targets. Define them before including `FR_math.h`
(or pass `-D` on the compiler command line):
| Define | What it removes | Typical savings |
|---|---|---|
-| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB |
+| `FR_LEAN` | Degree trig, BAM tan, angle converters, `FR_log10`, `FR_hypot`, waves + ADSR | ~3.7 KB |
+| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB |
| `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB |
| `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB |
+`FR_LEAN` keeps only radian trig (sin, cos, tan), inverse trig, sqrt,
+log2, ln, exp, pow2, and arithmetic — comparable to libfixmath's API at
+4.7 KB text. `FR_LEAN` implies `FR_NO_WAVES`.
+
`FR_CORE_ONLY` is a convenience shorthand that defines both
`FR_NO_PRINT` and `FR_NO_WAVES` in one step.
diff --git a/docs/examples.md b/docs/examples.md
index d07a477..f3e0bed 100644
--- a/docs/examples.md
+++ b/docs/examples.md
@@ -1,7 +1,7 @@
# Examples
Short, runnable snippets for the most common FR_Math tasks. Each
-example compiles cleanly against the v2.0.0 library with:
+example compiles cleanly against the v2.0.8 library with:
```bash
cc -Isrc example.c src/FR_math.c -o example
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 9eac417..48028ce 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -13,8 +13,8 @@ manager integration and no install step. Either:
- Copy `src/FR_math.c`, `src/FR_math.h`,
`src/FR_defs.h` (and optionally
- `src/FR_math_2D.cpp`, `src/FR_math_2D.h`,
- and `src/FR_trig_table.h`) into the target project, **or**
+ `src/FR_math_2D.cpp`, `src/FR_math_2D.h`)
+ into the target project, **or**
- Add FR_Math as a git submodule and point the build system at
`src/`.
diff --git a/idf_component.yml b/idf_component.yml
index 6a0d030..8097972 100644
--- a/idf_component.yml
+++ b/idf_component.yml
@@ -1,4 +1,4 @@
-version: "2.0.7"
+version: "2.0.8"
description: "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms. Zero dependencies."
url: "https://github.com/deftio/fr_math"
repository: "https://github.com/deftio/fr_math.git"
diff --git a/library.json b/library.json
index 495a89f..17f9649 100644
--- a/library.json
+++ b/library.json
@@ -1,6 +1,6 @@
{
"name": "FR_Math",
- "version": "2.0.7",
+ "version": "2.0.8",
"description": "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms in 4KB of flash. Zero dependencies.",
"keywords": [
"fixed-point",
diff --git a/library.properties b/library.properties
index cd2d953..47dde32 100644
--- a/library.properties
+++ b/library.properties
@@ -1,5 +1,5 @@
name=FR_Math
-version=2.0.7
+version=2.0.8
author=M. A. Chatterjee
maintainer=M. A. Chatterjee
sentence=Compact fixed-point math library for embedded systems. 4KB flash, zero dependencies, any radix.
diff --git a/llms.txt b/llms.txt
index 7c0ce3e..4f13ef1 100644
--- a/llms.txt
+++ b/llms.txt
@@ -9,7 +9,7 @@ or libraries. Pure C99, zero dependencies beyond ``.
- Repository: https://github.com/deftio/fr_math
- Documentation: https://deftio.github.io/fr_math/
- License: BSD-2-Clause
-- Version: 2.0.7
+- Version: 2.0.8
## Key concept: radix parameter
@@ -25,9 +25,8 @@ Common radix choices:
## Source files
- `src/FR_math.h` — all public declarations, macros, constants
-- `src/FR_math.c` — all function implementations (~42KB)
+- `src/FR_math.c` — all function implementations (trig tables inlined, ~45KB)
- `src/FR_defs.h` — type aliases: s8, s16, s32, u8, u16, u32
-- `src/FR_trig_table.h` — precomputed cosine quadrant table (129 entries) + tangent octant table (65 entries)
- `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++)
## Types
diff --git a/makefile b/makefile
index 83a4e64..c3f0c74 100644
--- a/makefile
+++ b/makefile
@@ -54,7 +54,8 @@ help:
@echo " coverage Generate coverage report (gcov)"
@echo " coverage-basic Basic coverage info without lcov"
@echo " coverage-html HTML coverage report (requires lcov)"
- @echo " size-report Multi-architecture size report"
+ @echo " size-report Multi-architecture size report (Docker)"
+ @echo " size-update Size report + patch doc files"
@echo " size-simple Size report for current platform"
@echo ""
@echo "Tools:"
@@ -196,10 +197,15 @@ coverage-html: clean dirs
@echo "HTML report: $(COV_DIR)/html/index.html"
@genhtml $(COV_DIR)/coverage.info --output-directory $(COV_DIR)/html
-# Size report - multi-architecture
+# Size report - multi-architecture (Docker cross-compilation)
.PHONY: size-report
size-report: dirs
- @scripts/size_report.sh
+ @scripts/crossbuild_sizes.sh
+
+# Size report + patch doc files
+.PHONY: size-update
+size-update: dirs
+ @scripts/crossbuild_sizes.sh --update
# Simple size report for current platform
.PHONY: size-simple
@@ -215,6 +221,33 @@ size-simple: lib
ls -lh $(BUILD_DIR)/*.o; \
fi
+# Lean build: only functions with libfixmath equivalents (radian trig,
+# inverse trig, sqrt, log2, ln, exp, mul/div — no degree trig, no BAM
+# tan, no waves, no hypot exact, no log10).
+.PHONY: size-lean
+size-lean: dirs
+ @echo "=== LEAN Build (FR_LEAN — libfixmath-equivalent API only) ==="
+ @$(CC) -I$(SRC_DIR) $(LIB_WARN) -DFR_LEAN -DFR_NO_PRINT -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_lean.o
+ @size $(BUILD_DIR)/FR_math_lean.o
+ @echo ""
+
+# Full build: everything (default — all trig, waves, ADSR, print, etc.)
+.PHONY: size-full
+size-full: dirs
+ @echo "=== FULL Build (all features) ==="
+ @$(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_full.o
+ @size $(BUILD_DIR)/FR_math_full.o
+ @echo ""
+
+# Side-by-side lean vs full size comparison
+.PHONY: size-compare
+size-compare: size-lean size-full
+ @echo "=== Lean vs Full Comparison ==="
+ @LEAN=$$(size $(BUILD_DIR)/FR_math_lean.o | tail -1 | awk '{print $$1}'); \
+ FULL=$$(size $(BUILD_DIR)/FR_math_full.o | tail -1 | awk '{print $$1}'); \
+ echo " Lean text: $${LEAN} bytes"; \
+ echo " Full text: $${FULL} bytes"
+
# Tools
TOOLS_DIR = tools
@@ -232,7 +265,7 @@ $(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR
.PHONY: clean
clean:
rm -rf $(BUILD_DIR) $(COV_DIR)
- rm -f *.o *.gcda *.gcno *.exe *.info
+ rm -f *.o *.gcda *.gcno *.gcov *.exe *.info
.PHONY: cleanall
cleanall: clean
@@ -250,7 +283,7 @@ coverage-basic: clean dirs
@echo ""
@echo "=== Basic Coverage Info ==="
@if command -v gcov >/dev/null 2>&1; then \
- gcov $(SRC_DIR)/FR_math.c -o $(BUILD_DIR) | grep -E "File|Lines executed"; \
+ cd $(BUILD_DIR) && gcov FR_math.o | grep -E "File|Lines executed"; \
echo ""; \
echo "For detailed coverage report, install lcov and run: make coverage"; \
else \
diff --git a/pages/assets/site.js b/pages/assets/site.js
index a686d8e..d4d0ecc 100644
--- a/pages/assets/site.js
+++ b/pages/assets/site.js
@@ -16,7 +16,7 @@
════════════════════════════════════════════════════════════════════ */
(function () {
- var FR_VERSION = 'v2.0.7';
+ var FR_VERSION = 'v2.0.8';
// Detect whether we're a top-level page or inside guide/.
// Works for both file:// and http(s):// because we look for the
diff --git a/pages/guide/building.html b/pages/guide/building.html
index 2ea77bc..e4c4944 100644
--- a/pages/guide/building.html
+++ b/pages/guide/building.html
@@ -182,45 +182,57 @@
Cross-compilation
Code size (.text section, compiled with -Os)
-
Sizes are for FR_math.c compiled with -Os -ffreestanding.
-Core = compiled with -DFR_CORE_ONLY (math only, no print, no waves).
+
Sizes are for FR_math.c compiled with -Os.
+Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt).
+Core = -DFR_CORE_ONLY (+ degree trig, BAM tan, log10, hypot).
+Full = all features (+ print, waves, ADSR).
With -ffunction-sections and linker --gc-sections,
only the functions your application references are linked, so real flash
usage will be smaller.
-
Target
Core
Full
+
Target
Lean
Core
Full
-
RP2040 (Cortex-M0+)
2.6 KB
4.2 KB
-
STM32 (Cortex-M4)
2.6 KB
4.2 KB
-
RISC-V 32 (rv32imac)
3.0 KB
4.7 KB
-
ESP32 (Xtensa)
3.5 KB
5.2 KB
-
68k
3.5 KB
5.3 KB
-
x86-64 (GCC)
3.5 KB
5.7 KB
-
x86-32
4.5 KB
6.8 KB
-
MSP430 (16-bit)
5.9 KB
8.9 KB
-
68HC11
10.8 KB
16.0 KB
-
AVR (ATmega328P)
7.0 KB
10.6 KB
+
Cortex-M4 (STM32)
3.3 KB
4.4 KB
5.5 KB
+
Cortex-M0 (RP2040)
3.4 KB
4.5 KB
5.7 KB
+
ARM Thumb
3.4 KB
4.7 KB
5.9 KB
+
RISC-V rv64
4.0 KB
5.5 KB
6.8 KB
+
RISC-V rv32
4.1 KB
5.5 KB
6.8 KB
+
Xtensa LX106 (ESP8266)
4.2 KB
5.8 KB
7.3 KB
+
ARM32
4.3 KB
5.8 KB
7.7 KB
+
68k
4.4 KB
6.2 KB
7.8 KB
+
x86-64 (GCC)
4.6 KB
6.1 KB
8.0 KB
+
AArch64 (ARM64)
4.8 KB
6.6 KB
8.7 KB
+
x86-32
5.3 KB
7.2 KB
9.2 KB
+
PowerPC
5.8 KB
8.0 KB
10.4 KB
+
MSP430 (16-bit)
7.8 KB
10.7 KB
12.8 KB
+
AVR (ATmega328P)
9.2 KB
12.8 KB
15.4 KB
+
68HC11
13.3 KB
18.4 KB
22.6 KB
Lean build options
-
Three compile-time #define guards let you strip optional subsystems
+
Compile-time #define guards let you strip optional subsystems
for ROM-constrained targets. Define them before including
FR_math.h (or pass -D on the compiler command line):
Copy src/FR_math.c, src/FR_math.h,
src/FR_defs.h (and optionally
- src/FR_math_2D.cpp, src/FR_math_2D.h,
- and src/FR_trig_table.h) into the target project, or
+ src/FR_math_2D.cpp, src/FR_math_2D.h)
+ into the target project, or
Add FR_Math as a git submodule and point the build system at
src/.
See the TDD
report for sweeps at radixes 8, 12, 16, and 24.
-
-
-
Function
Max err (%)*
Avg err (%)
Note
-
-
sin/cos (BAM)
0.4578
0.0076
fr_sin_bam/fr_cos_bam direct; 129-entry table
-
sin/cos (deg)
0.4578
0.0076
FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
-
sin/cos (rad)
0.6104
0.0085
fr_sin/fr_cos via fr_rad_to_bam ±2π r16
-
tan (BAM)
0.5823
0.0008
fr_tan_bam 65536-pt full; ±maxint at poles
-
tan (deg)
0.5311
0.0008
FR_Tan ±360° s15.16 full; sat at poles
-
tan (rad)
13.4069
0.0029
fr_tan ±2π r16 full; sat at poles
-
asin / acos
0.8743
0.0301
65536-pt; sqrt approx near boundary
-
atan2
0.5100
0.0237
65536x5 radii; asin/acos+hypot_fast8
-
atan
0.3390
0.0154
20001-pt full sweep [-10,10]; via FR_atan2
-
sqrt
0.0239
0.0000
Round-to-nearest
-
log2
0.0286
0.0029
65-entry mantissa table
-
pow2
0.0019
0.0003
65-entry fraction table
-
ln, log10
0.0004
0.0000
Via FR_MULK28 from log2
-
exp
0.0003
0.0000
FR_MULK28 + FR_pow2
-
exp_fast
0.0009
0.0001
Shift-only scaling
-
pow10
0.0007
0.0000
FR_MULK28 + FR_pow2
-
pow10_fast
0.0028
0.0002
Shift-only scaling
-
hypot (exact)
0.0000
0.0000
64-bit intermediate
-
hypot_fast8 (8-seg)
0.0915
0.0320
Shift-only, no multiply
-
-
-
*Relative error; reference clamped to 1% of full-scale output.
+
+
+
Function
Max err (%)*
Avg err (%)
Note
+
+
sin/cos (BAM)
0.1526
0.0030
fr_sin_bam/fr_cos_bam direct; 129-entry table
+
sin/cos (deg)
0.1526
0.0029
FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
+
sin/cos (rad)
0.1828
0.0033
fr_sin/fr_cos via fr_rad_to_bam ±2π r16
+
tan (BAM)
0.5823
0.0008
fr_tan_bam 65536-pt full; ±maxint at poles
+
tan (deg)
0.5311
0.0008
fr_tan_deg ±360° s15.16 full; sat at poles
+
tan (rad)
0.0386
0.0001
fr_tan ±2π r16; r24 pole bypass
+
asin / acos
0.7771
0.0280
65536-pt; sqrt approx near boundary
+
atan2
0.2564
0.0237
65536x5 radii; asin/acos+hypot_fast8
+
atan
0.2425
0.0155
20001-pt full sweep [-10,10]; via FR_atan2
+
sqrt
0.0000
0.0000
Round-to-nearest
+
log2
0.0116
0.0016
65-entry mantissa table
+
pow2
0.0018
0.0004
65-entry fraction table
+
ln, log10
0.0004
0.0000
Via FR_MULK28 from log2
+
exp
0.0003
0.0000
FR_MULK28 + FR_pow2
+
exp_fast
0.0009
0.0001
Shift-only scaling
+
pow10
0.0005
0.0000
FR_MULK28 + FR_pow2
+
pow10_fast
0.0022
0.0002
Shift-only scaling
+
hypot (exact)
0.0000
0.0000
64-bit intermediate
+
hypot_fast8 (8-seg)
0.0915
0.0320
Shift-only, no multiply
+
+
+
*Relative error; reference clamped to 1% of full-scale output.
What’s in the box
@@ -102,21 +102,24 @@
What’s in the box
Lean build options
-
Two compile-time #define guards let you strip optional subsystems
+
Compile-time #define guards let you strip optional subsystems
for ROM-constrained targets. Define them before including
FR_math.h (or pass -D on the compiler command line):
With both guards enabled the core math library (trig, inverse trig, log/exp,
-sqrt, hypot) compiles to ~3.5 KB on x86-64 / clang -Os. On Thumb-2 this
-would be roughly 2.6 KB.
+
FR_LEAN keeps only radian trig (sin, cos, tan), inverse trig,
+sqrt, log2, ln, exp, pow2, and arithmetic — comparable to libfixmath’s
+API but at 4.7 KB text vs libfixmath’s 4.9 KB + 112 KB BSS.
+With FR_LEAN + FR_NO_PRINT the library compiles to
+~4.7 KB on x86-64 / clang -Os.
/* Example: headless sensor node — math only, no print, no audio */
#define FR_NO_PRINT
@@ -237,7 +240,7 @@
Comparison
Multiply-free option
No
No
Yes (e.g. FR_EXP_FAST, FR_hypot_fast8)
Wave generators
No
No
6 shapes + ADSR
Dependencies
None
ARM only
None
-
Code size (Cortex-M0, -Os)
2.4 KB
~40 KB+
4.2 KB
+
Code size (Cortex-M0, -Os)
2.4 KB
~40 KB+
3.4 KB lean / 5.7 KB full
@@ -246,7 +249,7 @@
Comparison
FR_Math includes log/ln/log10, wave generators, ADSR, print helpers,
and variable radix. CMSIS-DSP estimate is for the math function subset
only. See
-docker/build_sizes.sh
+scripts/crossbuild-docker.sh
for the build script.