From 518093cc57c9bd0771bd07e673d5940e78cec8b3 Mon Sep 17 00:00:00 2001 From: deftio Date: Wed, 29 Apr 2026 19:33:28 -0700 Subject: [PATCH 1/7] updated bam fixes for sin/cos --- README.md | 16 +- compare_lfm/comparison_results.json | 479 ---------------------------- docker/build_sizes_compare.sh | 174 ++++++++++ docker/size_detail.sh | 102 ++++++ docs/README.md | 48 +-- docs/api-reference.md | 12 +- docs/building.md | 8 +- docs/examples.md | 6 +- docs/fixed-point-primer.md | 30 +- docs/getting-started.md | 2 +- docs/releases.md | 23 +- llms.txt | 3 +- pages/guide/api-reference.html | 14 +- pages/guide/building.html | 8 +- pages/guide/examples.html | 6 +- pages/guide/fixed-point-primer.html | 32 +- pages/guide/getting-started.html | 2 +- pages/index.html | 52 +-- pages/releases.html | 12 +- release_notes.md | 46 +++ src/FR_math.c | 198 +++++++----- src/FR_math.h | 172 ++++++---- src/FR_tan32.c | 282 ++++++++++++++++ src/FR_tan_table.h | 115 +++++++ src/FR_trig_table.h | 37 ++- tests/test_full_coverage.c | 93 +++++- tests/test_full_sweep.c | 346 ++++++++++++++++++++ tests/test_pole_table.c | 92 ++++++ tests/test_sweep_csv.c | 149 +++++++++ tests/test_tan32.c | 424 ++++++++++++++++++++++++ tests/test_tan32_peaks.c | 198 ++++++++++++ tests/test_tan32_sweep.c | 318 ++++++++++++++++++ tests/test_tdd.cpp | 323 ++++++++++++++++--- 33 files changed, 3023 insertions(+), 799 deletions(-) delete mode 100644 compare_lfm/comparison_results.json create mode 100755 docker/build_sizes_compare.sh create mode 100755 docker/size_detail.sh create mode 100644 src/FR_tan32.c create mode 100644 src/FR_tan_table.h create mode 100644 tests/test_full_sweep.c create mode 100644 tests/test_pole_table.c create mode 100644 tests/test_sweep_csv.c create mode 100644 tests/test_tan32.c create mode 100644 tests/test_tan32_peaks.c create mode 100644 tests/test_tan32_sweep.c diff --git a/README.md b/README.md index 42982fc..357bd5b 100644 --- a/README.md +++ b/README.md @@ -35,11 +35,15 @@ number of fractional bits available. All functions support radix 0 to 30. | Function | Max err (%) | Avg err (%) | Note | |---|---:|---:|---| -| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials | -| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) | -| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 | +| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table | +| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials | +| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 | +| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table | +| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials | +| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 | +| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 | | sqrt | 0.0003 | 0.0000 | Round-to-nearest | | log2 | 0.2479 | 0.0045 | 65-entry mantissa table | | pow2 | 0.1373 | 0.0057 | 65-entry fraction table | @@ -59,7 +63,7 @@ number of fractional bits available. All functions support radix 0 to 30. | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | | Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` | +| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` | | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json deleted file mode 100644 index adf0019..0000000 --- a/compare_lfm/comparison_results.json +++ /dev/null @@ -1,479 +0,0 @@ -{ - "description": "FR_math vs libfixmath benchmark — both measured against math.h double precision (IEEE 754)", - "gold_standard": " IEEE 754 double precision (~15 significant digits)", - "fixed_point_format": "Q16.16 (s15.16), 1 LSB = 1.52587890625000e-05", - "accuracy_points": 65536, - "timing_iterations": 100000, - "rel_error_threshold": 0.01, - "platform": "macOS ARM (Apple Silicon)", - "optimization": "-O2", - "results": [ - { - "function": "sin", - "double_reference": "std::sin", - "sweep": "65536-pt, [-pi, +pi]", - "speed": { - "fr_math_ns_per_call": 2.6, - "libfixmath_ns_per_call": 20.7, - "fr_math_speedup": 7.94, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 1.34165039e-04, - "mean_abs_error": 4.23947344e-05, - "max_error_lsb": 8.8, - "mean_error_lsb": 2.8, - "max_rel_error_pct": 1.0615, - "mean_rel_error_pct": 0.0158 - }, - "libfixmath": { - "max_abs_error": 7.74511497e-03, - "mean_abs_error": 5.34549003e-04, - "max_error_lsb": 507.6, - "mean_error_lsb": 35.0, - "max_rel_error_pct": 74.5513, - "mean_rel_error_pct": 0.6105 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "cos", - "double_reference": "std::cos", - "sweep": "65536-pt, [-pi, +pi]", - "speed": { - "fr_math_ns_per_call": 4.8, - "libfixmath_ns_per_call": 18.4, - "fr_math_speedup": 3.86, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 1.25349009e-04, - "mean_abs_error": 4.65658208e-05, - "max_error_lsb": 8.2, - "mean_error_lsb": 3.1, - "max_rel_error_pct": 0.9018, - "mean_rel_error_pct": 0.0161 - }, - "libfixmath": { - "max_abs_error": 7.75591931e-03, - "mean_abs_error": 5.36939114e-04, - "max_error_lsb": 508.3, - "mean_error_lsb": 35.2, - "max_rel_error_pct": 74.4001, - "mean_rel_error_pct": 0.6121 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "tan", - "double_reference": "std::tan", - "sweep": "65536-pt, [-1.2, 1.2] rad", - "speed": { - "fr_math_ns_per_call": 6.0, - "libfixmath_ns_per_call": 41.4, - "fr_math_speedup": 6.89, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 8.49384425e-04, - "mean_abs_error": 1.04510886e-04, - "max_error_lsb": 55.7, - "mean_error_lsb": 6.8, - "max_rel_error_pct": 1.0080, - "mean_rel_error_pct": 0.0228 - }, - "libfixmath": { - "max_abs_error": 1.82495961e-02, - "mean_abs_error": 8.01092905e-04, - "max_error_lsb": 1196.0, - "mean_error_lsb": 52.5, - "max_rel_error_pct": 0.7099, - "mean_rel_error_pct": 0.0410 - }, - "closer_to_double": "fr_math" - }, - "note": "Skip near pi/2" - }, - { - "function": "asin", - "double_reference": "std::asin", - "sweep": "65536-pt, [-0.999, 0.999]", - "speed": { - "fr_math_ns_per_call": 11.5, - "libfixmath_ns_per_call": 53.7, - "fr_math_speedup": 4.67, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 4.76933520e-04, - "mean_abs_error": 4.37641042e-05, - "max_error_lsb": 31.3, - "mean_error_lsb": 2.9, - "max_rel_error_pct": 0.5795, - "mean_rel_error_pct": 0.0134 - }, - "libfixmath": { - "max_abs_error": 1.01788963e-02, - "mean_abs_error": 3.64421558e-03, - "max_error_lsb": 667.1, - "mean_error_lsb": 238.8, - "max_rel_error_pct": 20.1233, - "mean_rel_error_pct": 2.4452 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "acos", - "double_reference": "std::acos", - "sweep": "65536-pt, [-0.999, 0.999]", - "speed": { - "fr_math_ns_per_call": 8.4, - "libfixmath_ns_per_call": 50.4, - "fr_math_speedup": 5.97, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 4.72479065e-04, - "mean_abs_error": 4.33857475e-05, - "max_error_lsb": 31.0, - "mean_error_lsb": 2.8, - "max_rel_error_pct": 0.5194, - "mean_rel_error_pct": 0.0056 - }, - "libfixmath": { - "max_abs_error": 1.01897006e-02, - "mean_abs_error": 3.64422377e-03, - "max_error_lsb": 667.8, - "mean_error_lsb": 238.8, - "max_rel_error_pct": 15.3142, - "mean_rel_error_pct": 0.3475 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "atan", - "double_reference": "std::atan", - "sweep": "65536-pt, [-50, 50]", - "speed": { - "fr_math_ns_per_call": 8.0, - "libfixmath_ns_per_call": 11.2, - "fr_math_speedup": 1.41, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 9.57408985e-04, - "mean_abs_error": 7.37662492e-05, - "max_error_lsb": 62.7, - "mean_error_lsb": 4.8, - "max_rel_error_pct": 0.2149, - "mean_rel_error_pct": 0.0061 - }, - "libfixmath": { - "max_abs_error": 1.01676134e-02, - "mean_abs_error": 6.15802358e-03, - "max_error_lsb": 666.3, - "mean_error_lsb": 403.6, - "max_rel_error_pct": 19.8632, - "mean_rel_error_pct": 0.4571 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "atan2", - "double_reference": "std::atan2", - "sweep": "65536-pt, 5 radii x 360 deg", - "speed": { - "fr_math_ns_per_call": 15.9, - "libfixmath_ns_per_call": 10.5, - "fr_math_speedup": 0.66, - "faster": "libfixmath" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 9.70679332e-04, - "mean_abs_error": 2.15170870e-04, - "max_error_lsb": 63.6, - "mean_error_lsb": 14.1, - "max_rel_error_pct": 0.4122, - "mean_rel_error_pct": 0.0258 - }, - "libfixmath": { - "max_abs_error": 1.01728729e-02, - "mean_abs_error": 3.88005371e-03, - "max_error_lsb": 666.7, - "mean_error_lsb": 254.3, - "max_rel_error_pct": 20.0045, - "mean_rel_error_pct": 0.9267 - }, - "closer_to_double": "fr_math" - }, - "note": "All 4 quadrants" - }, - { - "function": "sqrt", - "double_reference": "std::sqrt", - "sweep": "65536-pt, [0.01, 100]", - "speed": { - "fr_math_ns_per_call": 18.6, - "libfixmath_ns_per_call": 19.8, - "fr_math_speedup": 1.06, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 7.62924903e-06, - "mean_abs_error": 3.80582266e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.2, - "max_rel_error_pct": 0.0062, - "mean_rel_error_pct": 0.0001 - }, - "libfixmath": { - "max_abs_error": 7.62924903e-06, - "mean_abs_error": 3.80582266e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.2, - "max_rel_error_pct": 0.0062, - "mean_rel_error_pct": 0.0001 - }, - "closer_to_double": "tie" - } - }, - { - "function": "exp", - "double_reference": "std::exp", - "sweep": "65536-pt, [-5, 5]", - "speed": { - "fr_math_ns_per_call": 3.1, - "libfixmath_ns_per_call": 67.6, - "fr_math_speedup": 22.02, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 3.17909587e-03, - "mean_abs_error": 1.03218909e-04, - "max_error_lsb": 208.3, - "mean_error_lsb": 6.8, - "max_rel_error_pct": 0.1486, - "mean_rel_error_pct": 0.0078 - }, - "libfixmath": { - "max_abs_error": 3.30095957e-03, - "mean_abs_error": 9.38398029e-05, - "max_error_lsb": 216.3, - "mean_error_lsb": 6.1, - "max_rel_error_pct": 0.0756, - "mean_rel_error_pct": 0.0042 - }, - "closer_to_double": "fr_math" - } - }, - { - "function": "ln", - "double_reference": "std::log", - "sweep": "65536-pt, [0.01, 100]", - "speed": { - "fr_math_ns_per_call": 8.8, - "libfixmath_ns_per_call": 479.3, - "fr_math_speedup": 54.70, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 4.93278555e-05, - "mean_abs_error": 1.61117669e-05, - "max_error_lsb": 3.2, - "mean_error_lsb": 1.1, - "max_rel_error_pct": 0.3012, - "mean_rel_error_pct": 0.0006 - }, - "libfixmath": { - "max_abs_error": 3.40447818e-05, - "mean_abs_error": 5.14211182e-06, - "max_error_lsb": 2.2, - "mean_error_lsb": 0.3, - "max_rel_error_pct": 0.0557, - "mean_rel_error_pct": 0.0002 - }, - "closer_to_double": "libfixmath" - } - }, - { - "function": "log2", - "double_reference": "std::log2", - "sweep": "65536-pt, [0.01, 100]", - "speed": { - "fr_math_ns_per_call": 8.7, - "libfixmath_ns_per_call": 39.4, - "fr_math_speedup": 4.55, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 6.06739329e-05, - "mean_abs_error": 2.30368713e-05, - "max_error_lsb": 4.0, - "mean_error_lsb": 1.5, - "max_rel_error_pct": 0.4945, - "mean_rel_error_pct": 0.0006 - }, - "libfixmath": { - "max_abs_error": 3.56826644e-05, - "mean_abs_error": 9.96190621e-06, - "max_error_lsb": 2.3, - "mean_error_lsb": 0.7, - "max_rel_error_pct": 0.1758, - "mean_rel_error_pct": 0.0002 - }, - "closer_to_double": "libfixmath" - } - }, - { - "function": "mul", - "double_reference": "double a*b", - "sweep": "65536-pt, a in [-50,50], b in [-2,2]", - "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 1.2, - "fr_math_speedup": 1.33, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 7.62939453e-06, - "mean_abs_error": 3.81535541e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.3, - "max_rel_error_pct": 0.0692, - "mean_rel_error_pct": 0.0004 - }, - "libfixmath": { - "max_abs_error": 7.62939453e-06, - "mean_abs_error": 3.81535541e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.3, - "max_rel_error_pct": 0.0692, - "mean_rel_error_pct": 0.0004 - }, - "closer_to_double": "tie" - } - }, - { - "function": "div", - "double_reference": "double a/b", - "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]", - "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 5.2, - "fr_math_speedup": 5.98, - "faster": "fr_math" - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 7.62927377e-06, - "mean_abs_error": 3.82182808e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.3, - "max_rel_error_pct": 0.0727, - "mean_rel_error_pct": 0.0010 - }, - "libfixmath": { - "max_abs_error": 8.37162948e-06, - "mean_abs_error": 3.82625614e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.3, - "max_rel_error_pct": 0.0727, - "mean_rel_error_pct": 0.0010 - }, - "closer_to_double": "fr_math" - }, - "note": "Both use 64-bit intermediate" - }, - { - "function": "hypot", - "double_reference": "std::hypot", - "sweep": "65536-pt, 5 radii x 360 deg", - "speed": { - "fr_math_ns_per_call": 20.0 - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 7.62930188e-06, - "mean_abs_error": 3.67171926e-06, - "max_error_lsb": 0.5, - "mean_error_lsb": 0.2, - "max_rel_error_pct": 0.0076, - "mean_rel_error_pct": 0.0009 - } - }, - "note": "FR_math only (libfixmath has no hypot)" - }, - { - "function": "hypot_fast8", - "double_reference": "std::hypot", - "sweep": "65536-pt, 5 radii x 360 deg", - "speed": { - "fr_math_ns_per_call": 2.4 - }, - "accuracy_vs_double": { - "fr_math": { - "max_abs_error": 1.37244198e+00, - "mean_abs_error": 1.13634634e-01, - "max_error_lsb": 89944.4, - "mean_error_lsb": 7447.2, - "max_rel_error_pct": 0.1372, - "mean_rel_error_pct": 0.0516 - } - }, - "note": "FR_math only; shift-only, no multiply" - } - ], - "summary": { - "head_to_head_functions": 13, - "faster_wins": { "fr_math": 12, "libfixmath": 1 }, - "accuracy_wins": { "fr_math": 9, "libfixmath": 2, "tie": 2 }, - "total_functions_tested": 15 - }, - "notes": [ - "All accuracy measured vs IEEE 754 double. Lower = closer to perfect.", - "LSB = Q16.16 least-significant-bit = 1.53e-5. Best possible = 0.5 LSB.", - "Percent errors skip |ref| < 0.01 to avoid near-zero division spikes.", - "Both libraries use Q16.16 (s15.16): 1.0 = 65536.", - "FR_math trig: BAM + 129-entry LUT + linear interpolation.", - "libfixmath trig: parabolic approximation + 5th-order correction.", - "Timing: min of 3 passes x 100000 calls; cache-warm.", - "Speedup > 1.0 means FR_math is faster by that factor." - ], - "compiled_size_note": "Run 'make size' in .compare/ for live numbers. The values below are representative.", - "compiled_size": { - "compiler": "clang -O2 (macOS ARM)", - "fr_math": { - "files": "FR_math.c (single file)", - "functions": "trig(6), inv-trig(4), log/ln/log10, exp/pow2/pow10, exp_fast/pow10_fast, sqrt, hypot(2), waves(6), ADSR(4), print(4), format", - "rom_bytes": 7470, - "ram_bss_bytes": 0, - "note": "All tables in const ROM. Zero runtime allocation." - }, - "libfixmath": { - "files": "fix16.c, fix16_sqrt.c, fix16_exp.c, fix16_trig.c, fix16_str.c, uint32.c, fract32.c", - "functions": "trig(6), inv-trig(4), log/log2, exp, sqrt, mul/div, str", - "rom_bytes": 4912, - "ram_bss_bytes": 114688, - "rom_bytes_no_cache": 5476, - "ram_bss_bytes_no_cache": 0, - "note": "Default mode caches 112 KB of sin/exp LUTs in BSS. FIXMATH_NO_CACHE eliminates RAM but recomputes per call." - } - } -} diff --git a/docker/build_sizes_compare.sh b/docker/build_sizes_compare.sh new file mode 100755 index 0000000..940de5f --- /dev/null +++ b/docker/build_sizes_compare.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +# +# build_sizes_compare.sh — cross-compile FR_math.c with and without FR_tan32.c +# for every supported target, and report the size delta. +# +# Run inside the Docker container: +# docker run --rm -v $(pwd):/src fr-math-sizes bash /src/docker/build_sizes_compare.sh + +set -euo pipefail + +SRC_OLD="/src/src/FR_math.c" +SRC_NEW="/src/src/FR_tan32.c" +INC="-I/src/src" +OUT="/src/build/size_compare" + +mkdir -p "${OUT}" + +# ── helpers ──────────────────────────────────────────────────────────── + +# get_text_size +# Compiles source(s) to .o files, sums .text sections. +get_text_size() { + local label="$1"; shift + local cc="$1"; shift + local sz_cmd="$1"; shift + local flags="$1"; shift + # remaining args are source files + + if ! command -v "${cc}" >/dev/null 2>&1; then + echo "n/a" + return + fi + + local total=0 + for src in "$@"; do + local bname + bname=$(basename "${src}" .c) + local obj="${OUT}/${label}_${bname}.o" + if ! ${cc} ${flags} ${INC} -std=c99 -Wall -Os -ffreestanding \ + -c "${src}" -o "${obj}" 2>/dev/null; then + echo "fail" + return + fi + local text + text=$(${sz_cmd} --format=berkeley "${obj}" 2>/dev/null | tail -1 | awk '{print $1}') + total=$((total + text)) + done + echo "${total}" +} + +# resolve_size_tool: given a compiler path, find the matching size binary +resolve_size_tool() { + local cc="$1" + local prefix="${cc%-gcc*}" + prefix="${prefix%-gcc-*}" + if [[ "${prefix}" != "${cc}" ]] && command -v "${prefix}-size" >/dev/null 2>&1; then + echo "${prefix}-size" + else + echo "size" + fi +} + +# ── target definitions ──────────────────────────────────────────────── + +declare -a T_NAMES T_CCS T_SZ T_FLAGS + +add() { + T_NAMES+=("$1") + T_CCS+=("$2") + T_SZ+=("$(resolve_size_tool "$2")") + T_FLAGS+=("$3") +} + +# ARM +add "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb" +add "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft" +add "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb" + +# RISC-V +add "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32" + +# Xtensa (ESP32) +add "ESP32 (Xtensa)" xtensa-esp-elf-gcc "" + +# 68k +add "68k" m68k-linux-gnu-gcc-12 "" + +# x86 +add "x86-32" gcc "-m32" +add "x86-64" gcc "-m64" + +# MSP430 (16-bit, no stdint) +add "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT" + +# 68HC11 (8-bit) +add "68HC11" m68hc11-gcc "-DFR_NO_STDINT" + +# ── compile ──────────────────────────────────────────────────────────── + +echo "" +echo "FR_Math cross-platform size comparison: OLD vs OLD+NEW tan32" +echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" +echo "" + +declare -a R_OLD R_NEW + +for i in "${!T_NAMES[@]}"; do + label="${T_NAMES[$i]}" + cc="${T_CCS[$i]}" + sz="${T_SZ[$i]}" + flags="${T_FLAGS[$i]}" + + tag=$(echo "${label}" | tr ' ()/' '____') + + old=$(get_text_size "${tag}_old" "${cc}" "${sz}" "${flags}" "${SRC_OLD}") + new=$(get_text_size "${tag}_new" "${cc}" "${sz}" "${flags}" "${SRC_OLD}" "${SRC_NEW}") + + R_OLD+=("${old}") + R_NEW+=("${new}") + + echo " ${label}: old=${old} old+new=${new}" +done + +# ── output table ─────────────────────────────────────────────────────── + +echo "" +echo "## FR_Math size: Old vs Old + 32-bit LUT tan (\`-Os -ffreestanding\`)" +echo "" +printf "| %-26s | %10s | %10s | %10s | %6s |\n" "Target" "Old (text)" "w/ tan32" "Delta" "Delta%" +printf "| %-26s | %10s | %10s | %10s | %6s |\n" "--------------------------" "----------" "----------" "----------" "------" + +for i in "${!T_NAMES[@]}"; do + old="${R_OLD[$i]}" + new="${R_NEW[$i]}" + + if [[ "${old}" =~ ^[0-9]+$ ]] && [[ "${new}" =~ ^[0-9]+$ ]]; then + delta=$((new - old)) + pct=$(awk "BEGIN { printf \"%.1f\", 100.0*${delta}/${old} }") + printf "| %-26s | %8s B | %8s B | %+8d B | %5s%% |\n" \ + "${T_NAMES[$i]}" "${old}" "${new}" "${delta}" "${pct}" + else + printf "| %-26s | %10s | %10s | %10s | %6s |\n" \ + "${T_NAMES[$i]}" "${old}" "${new}" "—" "—" + fi +done + +echo "" +echo "Old = FR_math.c only (contains existing tan/atan)." +echo "w/ tan32 = FR_math.c + FR_tan32.c (adds new 32-bit LUT tan/atan alongside old)." +echo "Delta = additional bytes from FR_tan32.c (new functions + 129-entry u32 table)." +echo "" + +# ── per-function breakdown (x86-64) ─────────────────────────────────── + +echo "### Per-function breakdown (x86-64, GCC -Os)" +echo "" + +obj_old="${OUT}/x86_64_old_FR_math.o" +obj_new="${OUT}/x86_64_new_FR_tan32.o" + +if [[ -f "${obj_old}" ]] && [[ -f "${obj_new}" ]]; then + echo "**Old tan/atan in FR_math.o:**" + echo '```' + nm "${obj_old}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -iE "tan|atan" || true + echo '```' + echo "" + echo "**New in FR_tan32.o:**" + echo '```' + nm "${obj_new}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true + echo '```' +fi + +echo "" +echo "Done." diff --git a/docker/size_detail.sh b/docker/size_detail.sh new file mode 100755 index 0000000..19677e5 --- /dev/null +++ b/docker/size_detail.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -euo pipefail + +INC="-I/src/src" +FLAGS="-std=c99 -Wall -Os -ffreestanding" +OUT=/tmp/sz +mkdir -p "${OUT}" + +do_platform() { + local label="$1" + local cc="$2" + local flags="$3" + + if ! command -v "${cc}" >/dev/null 2>&1; then + return + fi + + # Resolve size and nm tools + local sz_cmd="size" + local nm_cmd="nm" + local prefix="${cc%-gcc*}" + if [ "${prefix}" != "${cc}" ]; then + command -v "${prefix}-size" >/dev/null 2>&1 && sz_cmd="${prefix}-size" + command -v "${prefix}-nm" >/dev/null 2>&1 && nm_cmd="${prefix}-nm" + fi + + # Compile + ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null || return + ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null || return + + local old_text new_text + old_text=$(${sz_cmd} --format=berkeley "${OUT}/old.o" | tail -1 | awk '{print $1}') + new_text=$(${sz_cmd} --format=berkeley "${OUT}/new.o" | tail -1 | awk '{print $1}') + + # Sum old tan/atan function sizes from nm -S + local old_tan_total=0 + while IFS=' ' read -r addr size typ name; do + if [ -n "${size}" ]; then + dec_size=$((16#${size})) + old_tan_total=$((old_tan_total + dec_size)) + fi + done < <(${nm_cmd} -n -S --defined-only "${OUT}/old.o" 2>/dev/null \ + | grep -E " [tT] " | grep -iE "tan|atan" || true) + + local replace_delta=$((new_text - old_tan_total)) + local new_total=$((old_text - old_tan_total + new_text)) + + printf "| %-26s | %6s | %6s | %6s | %6s | %+6d |\n" \ + "${label}" "${old_text}" "${old_tan_total}" "${new_text}" "${new_total}" "${replace_delta}" + + rm -f "${OUT}/old.o" "${OUT}/new.o" +} + +echo "" +echo "## FR_Math: Old vs Replacement size (new tan32 replaces old tan/atan)" +echo "" +printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \ + "Target" "Old" "OldT/A" "New" "Repl" "Delta" +printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \ + "--------------------------" "------" "------" "------" "------" "------" + +do_platform "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb" +do_platform "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft" +do_platform "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb" +do_platform "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32" +do_platform "ESP32 (Xtensa)" xtensa-esp-elf-gcc "" +do_platform "68k" m68k-linux-gnu-gcc-12 "" +do_platform "x86-32" gcc "-m32" +do_platform "x86-64" gcc "-m64" +do_platform "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT" + +echo "" +echo "Old = FR_math.c total .text" +echo "OldT/A = old tan+atan functions within FR_math.o (would be removed)" +echo "New = FR_tan32.c total .text (replacement functions + 129-entry u32 table)" +echo "Repl = library size after replacement (Old - OldT/A + New)" +echo "Delta = New - OldT/A (net change from replacement)" + +# === x86-64 per-function detail === +echo "" +echo "### x86-64 per-function detail" +echo "" + +gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null +gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null + +echo "**Old tan/atan functions in FR_math.o:**" +echo '```' +nm -n -S --defined-only "${OUT}/old.o" | grep -E " [tT] " | grep -iE "tan|atan" | \ +while IFS=' ' read -r addr size typ name; do + printf " %-30s %d bytes\n" "${name}" "$((16#${size}))" +done +echo '```' + +echo "" +echo "**New functions in FR_tan32.o:**" +echo '```' +nm -n -S --defined-only "${OUT}/new.o" | grep -E " [tT] " | \ +while IFS=' ' read -r addr size typ name; do + printf " %-30s %d bytes\n" "${name}" "$((16#${size}))" +done +echo '```' diff --git a/docs/README.md b/docs/README.md index 0e1dd1a..c88b451 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,7 +16,7 @@ into a single format. Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc, RISC-V toolchains, and Arduino. - Zero dependencies beyond ``. -- Parameterised radix: every function takes the binary point as an +- Parameterized radix: every function takes the binary point as an argument, so you choose how many fractional bits you need per call. - Deterministic, bounded error — every public symbol has a documented worst case in the [API reference](api-reference.md). @@ -33,7 +33,7 @@ or any tooling. If you want the browser version, look in | --- | --- | | [getting-started.md](getting-started.md) | Clone, build, run your first FR_Math program. | | [fixed-point-primer.md](fixed-point-primer.md) | Why fixed-point exists, sM.N notation, operations, how to pick a radix. | -| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behaviour. | +| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behavior. | | [examples.md](examples.md) | Runnable snippets: trig, log, waves, ADSR, 2D transforms. | | [building.md](building.md) | Makefile, scripts, test suite, coverage, cross-compilation. | | [releases.md](releases.md) | Release history with per-version highlights and breaking changes. | @@ -45,24 +45,28 @@ radix — Q16.16 is just the reference point for the table. See the [TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12, 16, and 24. Percent errors skip expected values near zero (|expected| < 0.01). - -| Function | Max err (%) | Avg err (%) | Note | -|---|---:|---:|---| -| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials | -| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) | -| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0003 | 0.0000 | Round-to-nearest | -| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | -| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | -| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | -| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | -| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | -| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | + +| Function | Max err (%) | Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table | +| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials | +| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 | +| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table | +| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials | +| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 | +| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 | +| sqrt | 0.0003 | 0.0000 | Round-to-nearest | +| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | +| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | +| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | +| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | +| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | +| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | +| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | +| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | ## What's in the box @@ -72,7 +76,7 @@ radix — Q16.16 is just the reference point for the table. See the | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | | Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` | +| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` | | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | @@ -118,7 +122,7 @@ pays off: - **8- and 16-bit MCUs** (AVR, MSP430, 8051, sdcc) where the FPU does not exist and even software float is too slow or too large. -- **Hot inner loops on any CPU** where a parameterised-radix integer +- **Hot inner loops on any CPU** where a parameterized-radix integer multiply is faster and more deterministic than a `float`. Think DSP taps, PID loops, coordinate transforms inside a scanline renderer. - **Bit-exact reproducibility** across compilers, architectures, and diff --git a/docs/api-reference.md b/docs/api-reference.md index 3f97f20..1ec3742 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -1,7 +1,7 @@ # API Reference Every public symbol, grouped by topic. Each entry lists the radix -convention, the precision, and the error / saturation behaviour. All +convention, the precision, and the error / saturation behavior. All types are from `FR_defs.h`: `s8 s16 s32 s64` for signed and `u8 u16 u32 u64` for unsigned integers (these are aliases for the `` types). @@ -12,7 +12,7 @@ Most entries list **inputs**, **output**, **radix handling** and **precision** separately, because in a mixed-radix library those four things are what actually lets you plan an arithmetic pipeline without hidden -quantisation. If you are new to fixed-point, the +quantization. If you are new to fixed-point, the [Fixed-Point Primer](fixed-point-primer.md) explains the notation first; come back here once you're comfortable reading `s15.16` and `s0.15`. @@ -118,7 +118,7 @@ so call sites read as intent: | --- | --- | --- | --- | | `I2FR(i, r)` | `i`: integer; `r`: target radix in bits | `s32` at radix `r` | `(i) << (r)`. No bounds check. Use when you know `|i|` fits in `32 − r` signed bits. | | `FR2I(x, r)` | `x`: fixed-point at radix `r` | integer | `(x) >> (r)`. Truncates toward **−∞** (C's signed shift). `FR2I(-1, 4) == -1`, not 0. | -| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behaviour. | +| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behavior. | | `FR_NUM(i, f, d, r)` | `i`: integer part; `f`: decimal fraction digits; `d`: number of digits in `f`; `r`: target radix | `s32` at radix `r` | Build a fixed-point literal from decimal. `FR_NUM(12, 34, 2, 10)` is 12.34 at s.10. Rounds toward zero; for round-to-nearest, add half an LSB at the call site. | | `FR_numstr(s, r)` | `s`: null-terminated decimal string (e.g. `"3.14159"`); `r`: target radix | `s32` at radix `r` | Runtime string-to-fixed-point parser (inverse of `FR_printNumF`). Handles signs, leading whitespace, and leading-zero fractions like `"0.05"`. Up to 9 fractional digits. No malloc, no strtod, no libm. Returns 0 for NULL or empty input. | | `FR2D(x, r)` | `x`: fixed-point at radix `r` | `double` | Debug-only: `x / (double)(1 << r)`. Pulls in `libm` — compile it out of release builds. | @@ -157,13 +157,13 @@ so call sites read as intent: | `FR_MAX(a, b)` | Two values of the same type | The larger of the two | Evaluates each argument once. | | `FR_CLAMP(x, lo, hi)` | `x`: value; `lo`, `hi`: bounds | `x` clamped to `[lo, hi]` | Equivalent to `FR_MIN(FR_MAX(x, lo), hi)`. | | `FR_DIV(x, xr, y, yr)` | `x`: numerator at radix `xr`; `y`: denominator at radix `yr` | `s32` at radix `xr` | Pre-scales the numerator in a 64-bit intermediate and **rounds to nearest** (adds half the divisor before truncating, with correct sign handling). Worst-case error ≤ 0.5 LSB. Works correctly across the full Q16.16 range. | -| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behaviour of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. | +| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behavior of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. | | `FR_DIV32(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s32)(x) << (yr)) / (s32)(y)`. 32-bit-only truncating path — requires `|x| < 2^(31 − yr)` to avoid overflow in the intermediate shift. Use on tiny targets (PIC, AVR, 8051) where 64-bit ops pull in unwanted compiler runtime code. | | `FR_MOD(x, y)` | `x`, `y`: same radix | remainder at the same radix | `(x) % (y)`. Standard C remainder semantics. | ## Arithmetic -FR_Math splits arithmetic into three flavours. The +FR_Math splits arithmetic into three flavors. The **macros** (`FR_ADD`, `FR_SUB`) are mixed-radix, inline, and wrap on overflow. The **s.16 helper functions** (`FR_FixMuls`, @@ -346,7 +346,7 @@ Four shifts plus three adds — cheap on an 8051, AVR, or any hand-written DSP inner loop — and the answer has at most ±0.5 LSB of truncation error. The same discipline applies to the other direction: in `FR_DEG2BAM` the divide-by-360 is -a compile-time constant, so any optimising compiler folds it into a +a compile-time constant, so any optimizing compiler folds it into a multiply-by-reciprocal (or, on a weaker toolchain, a runtime call that you can inline yourself). diff --git a/docs/building.md b/docs/building.md index c9f5f21..da61b4f 100644 --- a/docs/building.md +++ b/docs/building.md @@ -101,7 +101,7 @@ binaries to keep compile times low: | `test_log_exp` | Log base 2 / ln / log10 and their inverses. | | `test_2d` | 2D transforms, determinants, inverses. | | `test_full_coverage` | Dark-corner cases: overflow sentinels, edge radixes, round-trips. | -| `test_tdd` | Characterisation tests pinned to bit-exact reference values. | +| `test_tdd` | Characterization tests pinned to bit-exact reference values. | As of v2.0.0 the suite contains **42 tests** across those binaries and covers **99%** of the library source. @@ -119,12 +119,12 @@ make test ### Running the TDD pins after a change -`test_tdd.cpp` is a characterisation suite. It records +`test_tdd.cpp` is a characterization suite. It records exact bit patterns for a sample of inputs and fails loudly if those -patterns drift. Any change that modifies the numerical behaviour of +patterns drift. Any change that modifies the numerical behavior of the library will break this suite — that's the point. -If you *intended* to change the numerical behaviour (e.g. +If you *intended* to change the numerical behavior (e.g. you improved a polynomial approximation), update the pinned values in `tests/test_tdd.cpp` and note the change in `release_notes.md` along with any updates to the diff --git a/docs/examples.md b/docs/examples.md index b7d6145..1716efd 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -425,7 +425,7 @@ The `XFormPtI16` fast path takes `s16` coordinates in and writes `s16` out. It's a tiny bit lossier than the `s32` form, but it sidesteps all the fixed-point conversion on the hot path — useful inside -the inner loop of a scanline rasteriser where you already know +the inner loop of a scanline rasterizer where you already know your coordinates fit in 16 bits. *Caveats:* the output is narrowed to `s16`, @@ -511,7 +511,7 @@ int main(void) FR_printNumF(buf_putc, val, 16, 0, 8); printf(" 16 16 0x%08x %s\n", (unsigned)val, buf); /* Expected: "3.14158630" — good through 5 digits, then - * quantisation noise appears. This is the sweet spot for + * quantization noise appears. This is the sweet spot for * most embedded work: 16 bits of fraction fits in an s32 * with 15 bits of integer range (±32767). */ } @@ -562,7 +562,7 @@ at radix 8 the value is `0x324` — only 10 significant bits — so the decimal rendering can only faithfully reproduce about two fractional digits. At radix 24 the value is `0x03243F6A` — 26 significant bits — and seven decimal digits survive. The -eighth digit (`5` vs `4`) shows the quantisation floor: `2^−24 ≈ +eighth digit (`5` vs `4`) shows the quantization floor: `2^−24 ≈ 6 × 10^−8`, so the last digit is always uncertain. ## See also diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md index 4c127ad..ab79007 100644 --- a/docs/fixed-point-primer.md +++ b/docs/fixed-point-primer.md @@ -266,14 +266,14 @@ you want to think of an FR_Math value as a "number with a radix", think of the radix as a *type annotation that lives in your source code*, not a runtime field. -## Quantisation and loss of precision +## Quantization and loss of precision Fixing the radix also fixes the smallest representable fractional step. At radix *N*, that step is `2^−N` — nothing finer survives the round-trip into the integer. Any real value smaller than the step rounds to zero; any real value landing between two adjacent steps rounds to one of them. The difference between the ideal -value and its stored form is called **quantisation error**, and it +value and its stored form is called **quantization error**, and it is the main price paid for doing fractional math in integer registers. @@ -295,7 +295,7 @@ radix 16 and the picture changes: error = 0.00000153 (< 0.002 %) ``` -This behaviour isn't a bug — it is the same compromise IEEE-754 +This behavior isn't a bug — it is the same compromise IEEE-754 floating point makes with its mantissa. The difference is that a float hides the trade-off behind a variable exponent, while fixed-point puts it on a ledger that the programmer chooses up @@ -307,7 +307,7 @@ half the smallest step the application cares about. Any coarser and small signals vanish; any finer and integer headroom is being spent for no benefit. -A second consequence worth recording: quantisation error +A second consequence worth recording: quantization error *accumulates*. Summing a million low-radix values sums the errors too. Signal-processing pipelines with long feedback paths are the main reason to carry accumulators at a wider radix than the @@ -375,7 +375,7 @@ FR_Math ships this operation as callback `f`, which makes it usable on targets without stdio — a UART write, an LCD glyph pusher, a ring-buffer append. The `pad` parameter sets a minimum field width and `prec` sets the number of -fractional digits. Rounding behaviour matches the hand-rolled +fractional digits. Rounding behavior matches the hand-rolled version: excess fractional digits are truncated, and negative values are handled without the two's-complement trap described above. @@ -384,7 +384,7 @@ above. Once you've chosen a radix, the everyday operations behave almost like integer math — with one or two twists per -operation that you just have to internalise. Let's walk +operation that you just have to internalize. Let's walk through them. ### Addition and subtraction @@ -527,7 +527,7 @@ Three things to watch for: it explicitly before the divide. - **Rounding toward zero.** C's integer division truncates toward zero for both signs, so `−7 / 2 == −3` (not `−4`). Fixed-point - division inherits that behaviour. Round-to-nearest can be + division inherits that behavior. Round-to-nearest can be layered on top by adding `b / 2` (for a positive numerator) or `−b / 2` (for a negative numerator) to the pre-scaled numerator before the divide. @@ -557,7 +557,7 @@ for you: - Going to a *smaller* radix — the low bits are dropped. Precision is lost; headroom grows. This is a good place to add `± (1 << (from_r - to_r - 1))` - before the shift if you want round-to-nearest behaviour. + before the shift if you want round-to-nearest behavior. The value is conserved as closely as the destination radix can represent it. Nothing more, nothing less. @@ -620,7 +620,7 @@ and store the result back into a 32-bit register without thinking about it, you will eventually pass a pair of inputs whose product doesn't fit, and plain C will hand you wrap-around garbage with no warning. A signed 32-bit multiply that overflows is not a -runtime error in C — it's undefined behaviour that +runtime error in C — it's undefined behavior that happens to look like data most of the time. FR_Math defends against this in three layers, and it's @@ -711,12 +711,12 @@ you actually need 15 integer bits on that particular signal. ## A worked example: one-pole IIR low-pass filter The sections up to this point have introduced the pieces -individually: scaling, notation, quantisation, arithmetic, +individually: scaling, notation, quantization, arithmetic, overflow, and radix choice. A small end-to-end example is the fastest way to see how those pieces fit together on a real pipeline. The filter walked through below is a single-pole infinite-impulse-response (IIR) low-pass — about the simplest -entry in the DSP catalogue, but realistic enough to exercise +entry in the DSP catalog, but realistic enough to exercise nearly every decision the primer has covered so far. In floating point, the filter is one line of arithmetic: @@ -753,7 +753,7 @@ be picked: `x`, so it shares the same ±32767 output range. But because it accumulates small updates on every sample, it will drift and lose precision unless carried at a higher radix than the raw - input. This is the quantisation-error accumulation noted + input. This is the quantization-error accumulation noted earlier in the primer, showing up in practice. ### Step 2: pick the radixes @@ -841,7 +841,7 @@ feeds both versions a few thousand samples — a mix of sine tones, step inputs, and silence is enough to exercise the relevant paths — and reports the worst-case delta. For a radix-15 one-pole IIR the expected worst-case difference is on the order of a few LSB, -comparable to the inherent quantisation of the 16-bit output +comparable to the inherent quantization of the 16-bit output format and not audible in normal listening. Anything substantially larger indicates a radix choice that is too tight, a rounding mode that is drifting, or a missing int64 promotion on the @@ -917,12 +917,12 @@ Angles deserve their own section because FR_Math gives you angle into?** Because the `u16` wraparound *is* the angular modulus — that's the whole feature. Adding two `u16` BAM values automatically gives you the right answer modulo a full revolution, -with zero quantisation error at the boundary and no `% 65536` in +with zero quantization error at the boundary and no `% 65536` in sight. If BAM were `s32`, every read of the table would have to explicitly mask off the top bits (and handle negative values) before the quadrant extraction (`bam >> 14`) made any sense. You would have traded one free operation for two slow ones on every -sample, just to get back the same behaviour. So instead, the public +sample, just to get back the same behavior. So instead, the public trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends) *all* take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal `fr_cos_bam` / `fr_sin_bam` diff --git a/docs/getting-started.md b/docs/getting-started.md index 54b7f17..9eac417 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -284,7 +284,7 @@ coverage across the library sources. conventions work. - **[API Reference](api-reference.md)** — per-symbol inputs, outputs, precision, and saturation - behaviour. + behavior. - **[Examples](examples.md)** — runnable snippets for common tasks. - **[Building & Testing](building.md)** diff --git a/docs/releases.md b/docs/releases.md index 277b811..6655693 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -4,6 +4,17 @@ Release highlights. For the full per-symbol change log, see [release_notes.md](https://github.com/deftio/fr_math/blob/master/release_notes.md) in the repo. +## v2.0.8 — 2026 + +Tangent accuracy rewrite and trig rounding fix. + +- **BAM-native tangent**: new `fr_tan_bam(u16 bam)` with 65-entry octant table (130 bytes). No 64-bit math. `FR_TanI`, `FR_Tan`, `fr_tan` are now thin wrappers. +- **Round-to-nearest fix**: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy. +- **Conversion macro trimming**: `FR_DEG2BAM` and `FR_RAD2BAM` reduced to ~18-21 bits (from ~28 bits). Verified: no measurable accuracy impact. +- **`FR_TRIG_MINVAL` fixed**: now `-FR_TRIG_MAXVAL` (was `-FR_TRIG_MASK`) + +--- + ## v2.0.7 — 2026 README restructure, accuracy table cleanup, expanded cross-compile support. @@ -124,7 +135,7 @@ with v2.0.0 except where noted. - `FR_DIV(x, xr, y, yr)` — fixed-point division with 64-bit pre-scaling. Now **rounds to nearest** (≤ 0.5 LSB error) instead of truncating. `FR_DIV_TRUNC` preserves the old - truncating behaviour for backward compatibility. `FR_DIV32` is + truncating behavior for backward compatibility. `FR_DIV32` is the 32-bit-only truncating path. - `FR_MOD(x, xr, y, yr)` — fixed-point modulus. @@ -149,7 +160,7 @@ with v2.0.0 except where noted. | FR_atan signature | `(input, radix)` → s16 degrees | `(input, radix, out_radix)` → s32 radians | | FR_atan2 signature | `(y, x)` → s16 degrees | `(y, x, out_radix)` → s32 radians | | FR_BAM2RAD | off by 1024× (bug) | correct | -| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behaviour) | +| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behavior) | --- @@ -187,7 +198,7 @@ mandatory. dropped. - `FR_atan`, `FR_Tan`, `FR_TanI`: wiring and overflow fixes. -- `FR_printNumD/F/H`: fixed undefined behaviour on +- `FR_printNumD/F/H`: fixed undefined behavior on `INT_MIN` and a broken fraction extraction in the v1 code. - `FR_DEG2RAD` / `FR_RAD2DEG`: macro bodies @@ -205,7 +216,7 @@ mandatory. `FR_BAM2DEG`, `FR_RAD2BAM`, `FR_BAM2RAD`. BAM (16 bits per full circle) is the natural integer representation for phase accumulators and - gives zero quantisation at the wraparound. + gives zero quantization at the wraparound. - **Square root and hypot**: `FR_sqrt` uses a digit-by-digit integer isqrt on `int64_t`; `FR_hypot` computes `sqrt(x² + y²)` @@ -260,8 +271,8 @@ mandatory. ### Test suite v2 ships with **42 tests** across six test binaries -and a characterisation suite (`test_tdd.cpp`) that pins -numerical behaviour to bit-exact reference values. Overall line +and a characterization suite (`test_tdd.cpp`) that pins +numerical behavior to bit-exact reference values. Overall line coverage is **99%** on the library sources. ## v1.0.3 — 2025 diff --git a/llms.txt b/llms.txt index a6d254f..7c0ce3e 100644 --- a/llms.txt +++ b/llms.txt @@ -27,7 +27,7 @@ Common radix choices: - `src/FR_math.h` — all public declarations, macros, constants - `src/FR_math.c` — all function implementations (~42KB) - `src/FR_defs.h` — type aliases: s8, s16, s32, u8, u16, u32 -- `src/FR_trig_table.h` — precomputed sine table (256 entries) +- `src/FR_trig_table.h` — precomputed cosine quadrant table (129 entries) + tangent octant table (65 entries) - `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++) ## Types @@ -82,6 +82,7 @@ s32 fr_tan(s32 rad, u16 radix); // BAM (Binary Angle Measurement) — u16 where 65536 = 360 degrees: s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); +s32 fr_tan_bam(u16 bam); // Degree API at any radix: s32 FR_Cos(s16 deg, u16 radix); diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html index 5c03c66..328d2f4 100644 --- a/pages/guide/api-reference.html +++ b/pages/guide/api-reference.html @@ -5,7 +5,7 @@ API Reference — FR_Math - + @@ -18,7 +18,7 @@

API Reference

Every public symbol, grouped by topic. Each entry lists the radix -convention, the precision, and the error / saturation behaviour. All +convention, the precision, and the error / saturation behavior. All types are from FR_defs.h: s8 s16 s32 s64 for signed and u8 u16 u32 u64 for unsigned integers (these are aliases for the <stdint.h> types).

@@ -29,7 +29,7 @@

Reading this reference

radix handling and precision separately, because in a mixed-radix library those four things are what actually lets you plan an arithmetic pipeline without hidden -quantisation. If you are new to fixed-point, the +quantization. If you are new to fixed-point, the Fixed-Point Primer explains the notation first; come back here once you’re comfortable reading s15.16 and s0.15.

@@ -162,7 +162,7 @@

Integer ↔ fixed-point

FR_INT(x, r) x: fixed-point at radix r integer - Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behaviour. + Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behavior. FR_NUM(i, f, d, r) @@ -316,7 +316,7 @@

Utility macros

FR_DIV_TRUNC(x, xr, y, yr) x: numerator at radix xr; y: denominator at radix yr ((s64)(x) << (yr)) / (s32)(y) - Truncating division (rounds toward zero). This was the behaviour of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. + Truncating division (rounds toward zero). This was the behavior of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. FR_DIV32(x, xr, y, yr) @@ -335,7 +335,7 @@

Utility macros

Arithmetic

-

FR_Math splits arithmetic into three flavours. The +

FR_Math splits arithmetic into three flavors. The macros (FR_ADD, FR_SUB) are mixed-radix, inline, and wrap on overflow. The s.16 helper functions (FR_FixMuls, @@ -564,7 +564,7 @@

Worked example: keeping precision on chips without a multiplier

hand-written DSP inner loop — and the answer has at most ±0.5 LSB of truncation error. The same discipline applies to the other direction: in FR_DEG2BAM the divide-by-360 is -a compile-time constant, so any optimising compiler folds it into a +a compile-time constant, so any optimizing compiler folds it into a multiply-by-reciprocal (or, on a weaker toolchain, a runtime call that you can inline yourself).

diff --git a/pages/guide/building.html b/pages/guide/building.html index 10b3739..246afbb 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -119,7 +119,7 @@

The test suite

test_log_expLog base 2 / ln / log10 and their inverses. test_2d2D transforms, determinants, inverses. test_full_coverageDark-corner cases: overflow sentinels, edge radixes, round-trips. -test_tddCharacterisation tests pinned to bit-exact reference values. +test_tddCharacterization tests pinned to bit-exact reference values. @@ -137,12 +137,12 @@

Running a single binary

Running the TDD pins after a change

-

test_tdd.cpp is a characterisation suite. It records +

test_tdd.cpp is a characterization suite. It records exact bit patterns for a sample of inputs and fails loudly if those -patterns drift. Any change that modifies the numerical behaviour of +patterns drift. Any change that modifies the numerical behavior of the library will break this suite — that’s the point.

-

If you intended to change the numerical behaviour (e.g. +

If you intended to change the numerical behavior (e.g. you improved a polynomial approximation), update the pinned values in tests/test_tdd.cpp and note the change in release_notes.md along with any updates to the diff --git a/pages/guide/examples.html b/pages/guide/examples.html index 137525b..fec3f91 100644 --- a/pages/guide/examples.html +++ b/pages/guide/examples.html @@ -423,7 +423,7 @@

10. Integer-only 2D transform for scanline renderers

coordinates in and writes s16 out. It’s a tiny bit lossier than the s32 form, but it sidesteps all the fixed-point conversion on the hot path — useful inside -the inner loop of a scanline rasteriser where you already know +the inner loop of a scanline rasterizer where you already know your coordinates fit in 16 bits.

Caveats: the output is narrowed to s16, @@ -510,7 +510,7 @@

11. String round-trip and radix precision

FR_printNumF(buf_putc, val, 16, 0, 8); printf(" 16 16 0x%08x %s\n", (unsigned)val, buf); /* Expected: "3.14158630" — good through 5 digits, then - * quantisation noise appears. This is the sweet spot for + * quantization noise appears. This is the sweet spot for * most embedded work: 16 bits of fraction fits in an s32 * with 15 bits of integer range (±32767). */ } @@ -558,7 +558,7 @@

11. String round-trip and radix precision

so the decimal rendering can only faithfully reproduce about two fractional digits. At radix 24 the value is 0x03243F6A — 26 significant bits — and seven decimal digits survive. The -eighth digit (5 vs 4) shows the quantisation floor: +eighth digit (5 vs 4) shows the quantization floor: 2^−24 ≈ 6 × 10^−8, so the last digit is always uncertain.

diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html index a4e71c4..ce41074 100644 --- a/pages/guide/fixed-point-primer.html +++ b/pages/guide/fixed-point-primer.html @@ -294,7 +294,7 @@

Notation: sM.N and the radix

radix”, think of the radix as a type annotation that lives in your source code, not a runtime field.

-

Quantisation and loss of precision

+

Quantization and loss of precision

Fixing the radix also fixes the smallest representable fractional step. At radix N, that step is @@ -302,7 +302,7 @@

Quantisation and loss of precision

the round-trip into the integer. Any real value smaller than the step rounds to zero; any real value landing between two adjacent steps rounds to one of them. The difference between the ideal -value and its stored form is called quantisation +value and its stored form is called quantization error, and it is the main price paid for doing fractional math in integer registers.

@@ -323,7 +323,7 @@

Quantisation and loss of precision

error = 0.00000153 (< 0.002 %) -

This behaviour isn’t a bug — it is the same +

This behavior isn’t a bug — it is the same compromise IEEE-754 floating point makes with its mantissa. The difference is that a float hides the trade-off behind a variable exponent, while fixed-point puts it on a ledger that the @@ -336,7 +336,7 @@

Quantisation and loss of precision

vanish; any finer and integer headroom is being spent for no benefit.

-

A second consequence worth recording: quantisation error +

A second consequence worth recording: quantization error accumulates. Summing a million low-radix values sums the errors too. Signal-processing pipelines with long feedback paths are the main reason to carry accumulators at a wider radix @@ -406,7 +406,7 @@

Displaying a fixed-point value

usable on targets without stdio — a UART write, an LCD glyph pusher, a ring-buffer append. The pad parameter sets a minimum field width and prec sets -the number of fractional digits. Rounding behaviour matches the +the number of fractional digits. Rounding behavior matches the hand-rolled version: excess fractional digits are truncated, and negative values are handled without the two’s-complement trap described above.

@@ -415,7 +415,7 @@

Arithmetic: what the operations actually do

Once you’ve chosen a radix, the everyday operations behave almost like integer math — with one or two twists per -operation that you just have to internalise. Let’s walk +operation that you just have to internalize. Let’s walk through them.

Addition and subtraction

@@ -489,7 +489,7 @@

Multiplication

doesn’t fire. Rounds to nearest — adds 0.5 LSB before the shift.
  • FR_FixMulSat(a, b, r) — same shape with - the same round-to-nearest behaviour, but also saturates to + the same round-to-nearest behavior, but also saturates to FR_OVERFLOW_POS / FR_OVERFLOW_NEG if the result wouldn’t fit. Prefer this one by default unless you’ve proven @@ -555,7 +555,7 @@

    Division

    division truncates toward zero for both signs, so −7 / 2 == −3 (not −4). Fixed-point division inherits - that behaviour. Round-to-nearest can be layered on top by + that behavior. Round-to-nearest can be layered on top by adding b / 2 (for a positive numerator) or −b / 2 (for a negative numerator) to the pre-scaled numerator before the divide.
  • @@ -581,7 +581,7 @@

    Changing radix

  • Going to a smaller radix — the low bits are dropped. Precision is lost; headroom grows. This is a good place to add ± (1 << (from_r - to_r - 1)) - before the shift if you want round-to-nearest behaviour.
  • + before the shift if you want round-to-nearest behavior.

    The value is conserved as closely as the destination radix can @@ -644,7 +644,7 @@

    Overflow, saturation, and the sentinels

    about it, you will eventually pass a pair of inputs whose product doesn’t fit, and plain C will hand you wrap-around garbage with no warning. A signed 32-bit multiply that overflows is not a -runtime error in C — it’s undefined behaviour that +runtime error in C — it’s undefined behavior that happens to look like data most of the time.

    FR_Math defends against this in three layers, and it’s @@ -743,12 +743,12 @@

    Choosing a radix

    A worked example: one-pole IIR low-pass filter

    The sections up to this point have introduced the pieces -individually: scaling, notation, quantisation, arithmetic, +individually: scaling, notation, quantization, arithmetic, overflow, and radix choice. A small end-to-end example is the fastest way to see how those pieces fit together on a real pipeline. The filter walked through below is a single-pole infinite-impulse-response (IIR) low-pass — about the -simplest entry in the DSP catalogue, but realistic enough to +simplest entry in the DSP catalog, but realistic enough to exercise nearly every decision the primer has covered so far.

    In floating point, the filter is one line of arithmetic:

    @@ -790,7 +790,7 @@

    Step 1: inventory the ranges

    ±32767 output range. But because it accumulates small updates on every sample, it will drift and lose precision unless carried at a higher radix than the raw - input. This is the quantisation-error accumulation noted + input. This is the quantization-error accumulation noted earlier in the primer, showing up in practice. @@ -889,7 +889,7 @@

    Step 5: test against the reference

    exercise the relevant paths — and reports the worst-case delta. For a radix-15 one-pole IIR the expected worst-case difference is on the order of a few LSB, comparable to the -inherent quantisation of the 16-bit output format and not +inherent quantization of the 16-bit output format and not audible in normal listening. Anything substantially larger indicates a radix choice that is too tight, a rounding mode that is drifting, or a missing int64 promotion on the @@ -973,13 +973,13 @@

    Angle representations

    u16 wraparound is the angular modulus — that’s the whole feature. Adding two u16 BAM values automatically gives you the right answer modulo a full -revolution, with zero quantisation error at the boundary and no +revolution, with zero quantization error at the boundary and no % 65536 in sight. If BAM were s32, every read of the table would have to explicitly mask off the top bits (and handle negative values) before the quadrant extraction (bam >> 14) made any sense. You would have traded one free operation for two slow ones on every sample, just to get -back the same behaviour. So instead, the public trig entry points +back the same behavior. So instead, the public trig entry points (FR_CosI, FR_Cos, fr_cos, and friends) all take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html index b6a22ed..ee8f5df 100644 --- a/pages/guide/getting-started.html +++ b/pages/guide/getting-started.html @@ -297,7 +297,7 @@

    Next steps

    conventions work.
  • API Reference — per-symbol inputs, outputs, precision, and saturation - behaviour.
  • + behavior.
  • Examples — runnable snippets for common tasks.
  • Building & Testing diff --git a/pages/index.html b/pages/index.html index ac84759..62e6b7a 100644 --- a/pages/index.html +++ b/pages/index.html @@ -34,7 +34,7 @@

    FR_Math

    Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc, RISC-V toolchains, and Arduino.
  • Zero dependencies beyond <stdint.h>.
  • -
  • Parameterised radix: every function takes the binary point as an +
  • Parameterized radix: every function takes the binary point as an argument, so you choose how many fractional bits you need per call.
  • Deterministic, bounded error — every public symbol has a @@ -50,27 +50,31 @@

    Measured accuracy

    report for sweeps at radixes 8, 12, 16, and 24. Percent errors skip expected values near zero (|expected| < 0.01).

    - - - - - - - - - - - - - - - - - - - - -
    FunctionMax err (%)Avg err (%)Note
    sin / cos0.71690.010065536-pt sweep + specials
    tan0.71180.016265536-pt sweep (skip poles)
    asin / acos0.70250.010565536-pt; sqrt approx near boundary
    atan20.49530.026865536x5 radii; asin/acos+hypot_fast8
    atan0.29850.015920001-pt sweep [-10,10]; via FR_atan2
    sqrt0.00030.0000Round-to-nearest
    log20.24790.004565-entry mantissa table
    pow20.13730.005765-entry fraction table
    ln, log100.00150.0004Via FR_MULK28 from log2
    exp0.07190.0051FR_MULK28 + FR_pow2
    exp_fast0.07190.0064Shift-only scaling
    pow100.11630.0075FR_MULK28 + FR_pow2
    pow10_fast0.11630.0100Shift-only scaling
    hypot (exact)0.00010.000064-bit intermediate
    hypot_fast8 (8-seg)0.09770.0508Shift-only, no multiply
    + + + + + + + + + + + + + + + + + + + + + + + + +
    FunctionMax err (%)Avg err (%)Note
    sin/cos (BAM)0.16460.005865536 BAM; 129-entry quadrant table
    sin/cos (deg)0.59090.009165536-pt deg r7 + specials
    sin/cos (rad)0.16460.005965536-pt rad r16
    tan (BAM)0.17040.006565536 BAM; 65-entry octant table
    tan (deg)0.60000.014065536-pt deg r7 + specials
    tan (rad)0.17040.006565536-pt rad r16
    asin / acos1.97760.030865536-pt; sqrt approx near boundary
    atan20.49530.023865536x5 radii; asin/acos+hypot_fast8
    atan0.29850.015320001-pt sweep [-10,10]; via FR_atan2
    sqrt0.00030.0000Round-to-nearest
    log20.24790.004565-entry mantissa table
    pow20.13730.005765-entry fraction table
    ln, log100.00150.0004Via FR_MULK28 from log2
    exp0.07190.0051FR_MULK28 + FR_pow2
    exp_fast0.07190.0064Shift-only scaling
    pow100.11630.0075FR_MULK28 + FR_pow2
    pow10_fast0.11630.0100Shift-only scaling
    hypot (exact)0.00010.000064-bit intermediate
    hypot_fast8 (8-seg)0.09770.0508Shift-only, no multiply

    What’s in the box

    @@ -81,7 +85,7 @@

    What’s in the box

    ArithmeticFR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX UtilityFR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN Trig (integer deg)FR_Sin, FR_Cos, FR_Tan, FR_SinI, FR_CosI, FR_TanI -Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_sin_deg, fr_cos_deg +Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam, fr_sin_deg, fr_cos_deg Inverse trigFR_atan, FR_atan2, FR_asin, FR_acos Log / expFR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 RootsFR_sqrt, FR_hypot, FR_hypot_fast8 @@ -135,7 +139,7 @@

    Why fixed-point, in 2026?

    FPU does not exist and even software float is too slow or too large.
  • Hot inner loops on any CPU where a - parameterised-radix integer multiply is faster and more + parameterized-radix integer multiply is faster and more deterministic than a float. Think DSP taps, PID loops, coordinate transforms inside a scanline renderer.
  • Bit-exact reproducibility across compilers, diff --git a/pages/releases.html b/pages/releases.html index 337035a..e5ec364 100644 --- a/pages/releases.html +++ b/pages/releases.html @@ -159,7 +159,7 @@

    New utility macros

  • FR_DIV(x, xr, y, yr) — fixed-point division with 64-bit pre-scaling. Now rounds to nearest (≤ 0.5 LSB error) instead of truncating. - FR_DIV_TRUNC preserves the old truncating behaviour + FR_DIV_TRUNC preserves the old truncating behavior for backward compatibility. FR_DIV32 is the 32-bit-only truncating path.
  • FR_MOD(x, xr, y, yr) — fixed-point modulus.
  • @@ -190,7 +190,7 @@

    Breaking changes from v2.0.0

    FR_atan signature(input, radix) → s16 degrees(input, radix, out_radix) → s32 radians FR_atan2 signature(y, x) → s16 degrees(y, x, out_radix) → s32 radians FR_BAM2RADoff by 1024× (bug)correct -FR_DIV roundingtruncates toward zerorounds to nearest (use FR_DIV_TRUNC for old behaviour) +FR_DIV roundingtruncates toward zerorounds to nearest (use FR_DIV_TRUNC for old behavior) @@ -231,7 +231,7 @@

    Numerical fixes

    dropped.
  • FR_atan, FR_Tan, FR_TanI: wiring and overflow fixes.
  • -
  • FR_printNumD/F/H: fixed undefined behaviour on +
  • FR_printNumD/F/H: fixed undefined behavior on INT_MIN and a broken fraction extraction in the v1 code.
  • FR_DEG2RAD / FR_RAD2DEG: macro bodies @@ -251,7 +251,7 @@

    New functionality

    FR_BAM2DEG, FR_RAD2BAM, FR_BAM2RAD. BAM (16 bits per full circle) is the natural integer representation for phase accumulators and - gives zero quantisation at the wraparound.
  • + gives zero quantization at the wraparound.
  • Square root and hypot: FR_sqrt uses a digit-by-digit integer isqrt on int64_t; FR_hypot computes sqrt(x² + y²) @@ -313,8 +313,8 @@

    Breaking changes

    Test suite

    v2 ships with 42 tests across six test binaries -and a characterisation suite (test_tdd.cpp) that pins -numerical behaviour to bit-exact reference values. Overall line +and a characterization suite (test_tdd.cpp) that pins +numerical behavior to bit-exact reference values. Overall line coverage is 99% on the library sources.

    v1.0.3 — 2025

    diff --git a/release_notes.md b/release_notes.md index 8a5f3bb..f5e1ef0 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,5 +1,51 @@ # FR_Math Release Notes +## Version 2.0.8 (2026) + +Tangent accuracy rewrite and trig rounding fix. + +### BAM-native tangent table + +- **New `fr_tan_bam(u16 bam)`** function with a dedicated 65-entry octant + lookup table (`gFR_TAN_TAB_O` in `FR_trig_table.h`, 130 bytes ROM). + First octant uses direct table + lerp; second octant uses the + reciprocal identity `tan(x) = 1/tan(90-x)` with one 32-bit division. + No 64-bit intermediates anywhere in the tan path. +- **`FR_TanI`, `FR_Tan`, `fr_tan`** are now thin wrappers over + `fr_tan_bam`. The old sin/cos division implementation is removed. +- Peak error: 0.17% (BAM), 0.60% (deg r7), 0.17% (rad r16). + +### Round-to-nearest fix for radian/degree wrappers + +- `fr_cos`, `fr_sin`, `fr_tan`, `FR_Cos`, `FR_Sin`, `FR_Tan` now add + 0.5 LSB (`1 << (radix-1)`) before the `>> radix` shift when converting + from radians/degrees to BAM. This rounds to the nearest BAM value + instead of truncating, eliminating a systematic 1-BAM rounding error + that caused ~1% peak error near zero crossings. +- Radian-path sin/cos/tan now match BAM-native accuracy (0.16-0.17% + peak, was ~1.03%). + +### Conversion macro trimming + +- `FR_DEG2BAM`: 10 terms (~28 bits) reduced to 7 terms (~18 bits) +- `FR_RAD2BAM`: 9 terms (~27 bits) reduced to 7 terms (~21 bits) +- `FR_DEG2RAD`: 3 terms (~13 bits) extended to 5 terms (~17 bits) +- 18 bits of precision gives 4 bits of headroom over the 14-bit + effective BAM resolution of the trig tables. Verified: reverting to + the old full-precision macros changes sin/cos peak error by <0.04%. + +### Other + +- `FR_TRIG_MINVAL` fixed: was `-FR_TRIG_MASK` (-65535), now + `-FR_TRIG_MAXVAL` (-2147483647) to properly pair with `FR_TRIG_MAXVAL` + for tan saturation clamping. +- Accuracy table in all docs now shows separate BAM/deg/rad rows for + sin/cos and tan, matching the TDD characterization report. +- `fr_tan_bam` added to function listings across README, docs, HTML + pages, and llms.txt. + +--- + ## Version 2.0.7 (2026) README restructure, accuracy table cleanup, and expanded cross-compile support. diff --git a/src/FR_math.c b/src/FR_math.c index 181972e..dce131b 100644 --- a/src/FR_math.c +++ b/src/FR_math.c @@ -86,8 +86,16 @@ s32 fr_cos_bam(u16 bam) d = lo - hi; /* >= 0: cos monotonic */ v = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); - /* Shift s0.15 → s15.16 */ - v <<= 1; + if (v < 0x40) { + /* Near zero crossing: redo interpolation with 3 extra bits of + * precision to reduce rounding error when the result is small. */ + s32 lo3 = (s32)gFR_COS_TAB_Q[idx] << 3; + s32 d3 = lo3 - ((s32)gFR_COS_TAB_Q[idx + 1] << 3); + v = lo3 - (((d3 * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); + v = (v + 2) >> 2; /* s0.18 → s15.16 with rounding */ + } else { + v <<= 1; /* s0.15 → s15.16 */ + } return (q == 1 || q == 2) ? -v : v; } @@ -100,19 +108,6 @@ s32 fr_sin_bam(u16 bam) return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT)); } -/* Convert radians at given radix to BAM with rounding. - * One radian = 65536 / (2*pi) ≈ 10430.378 BAM units. - * We use the more precise scaled constant 10430378 / 1000 to keep error - * bounded across a wide range of radians. - */ -static u16 fr_rad_to_bam(s32 rad, u16 radix) -{ - int64_t scaled = ((int64_t)rad * 10430378LL) / 1000; - if (radix > 0) - scaled >>= radix; - return (u16)((u32)scaled & 0xffff); -} - s32 fr_cos(s32 rad, u16 radix) { return fr_cos_bam(fr_rad_to_bam(rad, radix)); @@ -123,70 +118,131 @@ s32 fr_sin(s32 rad, u16 radix) return fr_sin_bam(fr_rad_to_bam(rad, radix)); } -/* fr_tan: returns sin/cos at s15.16 (radix 16). Saturates if cos is near zero. */ -s32 fr_tan(s32 rad, u16 radix) -{ - u16 bam = fr_rad_to_bam(rad, radix); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); -} - /*======================================================= - * Integer-degree and fixed-radix-degree trig wrappers + * BAM-native tangent: fr_tan_bam * - * FR_CosI / FR_SinI are macros in the header (zero cost). The fixed-radix - * variants here convert s.r degrees to BAM in one shot using a precomputed - * reciprocal of 360 to avoid division on multiply-poor cores like 8051. + * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant + * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the + * second octant (45°, 90°). Result is s15.16 with saturation at the + * poles. * - * Math: bam = deg * (65536 / 360) = deg * 182.0444... - * In s.16 fixed point: 65536 / 360 = 0xB60B (rounded). So - * bam_u16 = (deg_s.r * 0xB60B) >> r - * gives bam in u16 BAM units. The constant 0xB60B contains the divide by - * 360 baked in; the shift `>> r` strips the input radix. + * No 64-bit intermediates. One 32-bit division only in the >45° path. */ -static u16 fr_deg_radix_to_bam(s16 deg, u16 radix) +s32 fr_tan_bam(u16 bam) { - /* 0xB60B ≈ (65536/360) * 256 — the ×256 prescale keeps 32-bit math - * friendly to 8051-class MCUs. We must shift out both the input - * fraction bits (radix) AND the 8-bit prescale, hence radix + 8. - * The +half term rounds to nearest, matching FR_DEG2BAM behaviour. - */ - s32 v = (s32)deg * 0xB60BL; - u16 shift = radix + 8; - return (u16)((u32)((v + (1L << (shift - 1))) >> shift) & 0xffff); + u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ + u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ + s32 sign = 1; + u32 idx, frac; + s32 lo, hi, raw; + + /* Exact zeros: bam lands exactly on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Poles: bam lands exactly on 90° or 270° */ + if (inq == 0 && (q == 1 || q == 3)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + sign = -1; + } + + /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. + * Split into first octant [0, 45°) and second octant [45°, 90°). */ + if (inq < FR_TAN_OCTANT) { + /* First octant: direct table lookup + lerp. + * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, + * bottom FR_TAN_FRAC_BITS drive interpolation. */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + /* raw is in u0.15. Shift to s15.16. */ + raw <<= 1; + } else { + /* Second octant: tan(x) = 1 / tan(90° - x). + * complement is in (0, 0x2000] = (0°, 45°]. */ + u32 comp = 0x4000u - inq; + + /* Look up tan(complement) from the table */ + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + /* raw is tan(complement) in u0.15. Compute 1/raw in s15.16. + * 1.0 in s15.16 = 0x10000. We want (1<<16) / (raw_in_0.15) + * = (1<<16) * (1<<15) / raw_raw = (1<<31) / raw. + * Use unsigned to avoid overflow: 0x80000000 / raw. */ + if (raw < 2) { + /* Near pole: saturate */ + raw = FR_TRIG_MAXVAL; + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + + return (sign < 0) ? -raw : raw; +} + +/* fr_tan: returns tan at s15.16 (radix 16). Uses BAM-native table. + * At exact poles, fr_tan_bam's sign convention is based on BAM quadrant + * which loses the original approach direction. Fix up: if the result + * saturates, the sign should match the sign of the radian input. */ +s32 fr_tan(s32 rad, u16 radix) +{ + s32 result = fr_tan_bam(fr_rad_to_bam(rad, radix)); + if (result == FR_TRIG_MAXVAL && rad < 0) + return -FR_TRIG_MAXVAL; + if (result == -FR_TRIG_MAXVAL && rad > 0) + return FR_TRIG_MAXVAL; + return result; } -s32 FR_Cos(s16 deg, u16 radix) +/*======================================================= + * Integer-degree and fixed-radix-degree trig wrappers + */ +s32 FR_Cos(s32 deg, u16 radix) { - return fr_cos_bam(fr_deg_radix_to_bam(deg, radix)); + u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); + return fr_cos_bam(bam); } -s32 FR_Sin(s16 deg, u16 radix) +s32 FR_Sin(s32 deg, u16 radix) { - return fr_sin_bam(fr_deg_radix_to_bam(deg, radix)); + u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); + return fr_sin_bam(bam); } -s32 FR_TanI(s16 deg) +s32 FR_TanI(s32 deg) { - u16 bam = FR_DEG2BAM(deg); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); + /* Exact pole: deg mod 180 == ±90. Sign matches input sign + * (positive deg → +MAXVAL, negative deg → -MAXVAL). */ + s32 rem = deg % 180; + if (rem == 90 || rem == -90) + return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + return fr_tan_bam(FR_DEG2BAM_I(deg)); } -s32 FR_Tan(s16 deg, u16 radix) +s32 FR_Tan(s32 deg, u16 radix) { - u16 bam = fr_deg_radix_to_bam(deg, radix); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); + /* Check for exact integer poles before using the shift-only DEG2BAM + * macro, which can map to the wrong BAM quadrant for large angles. + * Only trigger when fractional bits are zero (exact pole). */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 deg_int = deg >> radix; + s32 rem = deg_int % 180; + if (rem == 90 || rem == -90) + return (deg >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); + return fr_tan_bam(bam); } /*======================================================= @@ -275,7 +331,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) { s32 one = (s32)1 << radix; if (input_abs >= one) - return sign ? FR_BAM2RAD(FR_BAM_HALF, out_radix) : 0; + return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; } v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ @@ -297,7 +353,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); if (sign) - rad_out = FR_BAM2RAD(FR_BAM_HALF, out_radix) - rad_out; + rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; return rad_out; } @@ -362,7 +418,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); if (sign) bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ - return FR_BAM2RAD(bam, out_radix); + return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); } } @@ -370,7 +426,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 FR_asin(s32 input, u16 radix, u16 out_radix) { /* asin(x) = pi/2 - acos(x) */ - s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); return half_pi - FR_acos(input, radix, out_radix); } @@ -394,12 +450,12 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) /* Axis cases — exact angles, no divide. */ if (x == 0) { - if (y > 0) return FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* pi/2 */ - if (y < 0) return -FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* -pi/2 */ + if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ + if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ return 0; } if (y == 0) - return (x > 0) ? 0 : FR_BAM2RAD(FR_BAM_HALF, out_radix); /* 0 or pi */ + return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ ax = (x < 0) ? -x : x; ay = (y < 0) ? -y : y; @@ -443,7 +499,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) if (cos_val < FR_ATAN2_SMALL) { /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ - s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); } else @@ -453,7 +509,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) /* Apply quadrant from signs of x and y. * q1_angle is always positive [0..pi/2]. */ { - s32 pi = FR_BAM2RAD(FR_BAM_HALF, out_radix); + s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); if (x > 0) return (y > 0) ? q1_angle : -q1_angle; /* x < 0: mirror across y-axis */ diff --git a/src/FR_math.h b/src/FR_math.h index 6eff284..a251316 100644 --- a/src/FR_math.h +++ b/src/FR_math.h @@ -270,8 +270,8 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { #define FR_TRIG_OUT_PREC (16) #define FR_TRIG_MASK ((1 << (FR_TRIG_PREC)) - 1) #define FR_TRIG_ONE (1L << FR_TRIG_OUT_PREC) /* 65536 = 1.0 */ -#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation */ -#define FR_TRIG_MINVAL (-FR_TRIG_MASK) +#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation max */ +#define FR_TRIG_MINVAL (-FR_TRIG_MAXVAL) /* tan saturation min */ /* Bit Shift Scaling macros. Useful on some platforms with poor MUL performance. * Also can be useful if you need to scale numbers with @@ -304,32 +304,108 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /* scale by log2(10) 3.32192809489 used for converting pow2() to pow10 */ #define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13)) -/* TRIG Conversion macros - * Convert degrees <--> radians <--> quadrants <--> degrees - * no multiply (may reduce chances of overflow in certain circumstances) - * works on all int types and radixes (pure ints will have trunc err) - * radians = 2*pi per revolution - * degrees = 360 per revolution - * quadrants = 4 per revolution - * freq = 1 per revolution - */ -/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 using shifts only. - * Worst-case relative error: ~1.6e-4 (acceptable for embedded use; if you - * need better precision, multiply by FR_kDEG2RAD and shift down by FR_kPREC). - * Side-effect note: x is referenced 3 times, so do not pass an expression - * with side effects. +/* Shift-only angular conversion macros + * + * All are pure constant multipliers expressed as shifts — no multiply, no + * divide, no 64-bit intermediates, no accumulators. Work at any radix: if + * your input is degrees at radix 8, the output is the target unit at radix 8. + * The caller shifts as needed. + * + * Angular units: + * degrees = 360 per revolution + * radians = 2*pi per revolution + * BAM = 65536 per revolution (Binary Angular Measure, u16) + * quadrants = 4 per revolution (= BAM >> 14) + * + * Side-effect note: x is referenced multiple times in each macro — do not + * pass expressions with side effects. */ -#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13)) -/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.295780 using shifts only. - * Worst-case relative error: ~2.1e-6. - * Side-effect note: x is referenced 7 times. - */ +/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */ +#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20)) + +/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */ #define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10)) +/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits). + * CAUTION: overflows s32 when |x| > ~256 deg at s15.16 (x<<7 term). + * For safe conversion at any radix, use fr_deg_to_bam() instead. */ +#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9)) + +/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */ +#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13)) + +/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits). + * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term). + * For safe conversion at any radix, use fr_rad_to_bam() instead. */ +#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) + +/* ── Overflow-safe rad/deg to BAM conversion functions ───────────── + * + * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that + * need the full ±2*pi or ±360° range at any radix. + * + * Strategy: normalize input to radix 16, conditionally reduce into + * a safe zone, apply the full-precision shift-only multiply, then + * extract the u16 BAM. No precision loss from halving/quartering. + * + * fr_rad_to_bam: reduce to [-pi, pi], reordered terms. ±2*pi safe. + * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe. + */ + +/* Constants at radix 16 */ +#define FR_PI_R16 205887 /* round(pi * 65536) */ +#define FR_TWO_PI_R16 411775 /* round(2*pi * 65536) */ +#define FR_D90_R16 5898240 /* 90 * 65536 */ +#define FR_D180_R16 11796480 /* 180 * 65536 */ +#define FR_D360_R16 23592960 /* 360 * 65536 */ + +static u16 __attribute__((unused)) fr_rad_to_bam(s32 rad, u16 radix) +{ + /* Normalize to radix 16 */ + s32 r = (radix > 16) ? (rad >> (radix - 16)) + : (radix < 16) ? (rad << (16 - radix)) + : rad; + + /* Reduce to [-pi, pi] — one conditional pass, covers ±2*pi input */ + if (r > FR_PI_R16) r -= FR_TWO_PI_R16; + if (r < -FR_PI_R16) r += FR_TWO_PI_R16; + + /* Shift terms reordered: interleave negatives early to keep all + * intermediate sums within s32. Same 7-term decomposition as + * FR_RAD2BAM, just reordered. Safe for |r| <= 205887 (pi). */ + s32 bam = (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1); + return (u16)((bam + (1 << 15)) >> 16); +} + +static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix) +{ + /* Normalize to radix 16 */ + s32 d = (radix > 16) ? (deg >> (radix - 16)) + : (radix < 16) ? (deg << (16 - radix)) + : deg; + + /* Reduce to [-180, 180) — covers ±360 input */ + if (d >= FR_D180_R16) d -= FR_D360_R16; + if (d < -FR_D180_R16) d += FR_D360_R16; + + /* Reduce to [-90, 90) with BAM quadrant offset. + * Needed because 182 * 11796480 (±180° at r16) overflows s32. */ + u16 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } + + /* |d| < 90° at r16. Max intermediate = 5898240 * 192 = 1.13B, safe. */ + s32 bam = (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); + return (u16)(offset + (u16)((bam + (1 << 15)) >> 16)); +} + +/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */ +#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25)) + +/* Legacy quadrant macros (quadrants = BAM >> 14) */ #define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14)) #define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11)) - #define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13)) #define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1)) @@ -347,44 +423,12 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * - The top 2 bits select the quadrant (no `% 360` modulo needed). * - The next 7 bits index the 128-entry quadrant table directly. * - The bottom 7 bits give linear-interpolation precision. - * - * All BAM macros are *macros* (not functions) so they evaluate inline and - * cost nothing if you don't call them. Side-effect note: each macro - * references its argument multiple times — do not pass an expression with - * side effects. */ #define FR_BAM_BITS (16) #define FR_BAM_FULL (1L << FR_BAM_BITS) /* 65536 */ #define FR_BAM_QUADRANT (FR_BAM_FULL >> 2) /* 16384 */ #define FR_BAM_HALF (FR_BAM_FULL >> 1) /* 32768 */ -/* Convert degrees -> BAM. Exact formula: deg * 65536 / 360. - * Computed in s32; for s16-range deg the intermediate (deg << 16) fits. - * The cast to u16 wraps modulo full circle, which is mathematically correct. - * Side-effect note: deg is referenced twice for sign-aware rounding. - * - * Worst-case error: <= 0.5 LSB BAM (~0.0028 deg) per degree. No accumulation - * across full circles. - */ -#define FR_DEG2BAM(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) - -/* Convert BAM -> degrees. bam * (360 / 65536) ≈ bam * (45/8192). - * Truncated; result is integer degrees. - */ -#define FR_BAM2DEG(bam) ((s16)(((s32)(u16)(bam) * 45) >> 13)) - -/* Convert radians (at given radix) -> BAM. rad * (65536 / (2*pi)) ≈ rad * 10430.378 - * For radix-16 input: ((rad * 10430) >> 16). Approximated; for high accuracy - * combine with FR_kRAD2Q multiplier. - */ -#define FR_RAD2BAM(rad, radix) ((u16)(((s32)(rad) * 10430L) >> (radix))) - -/* Convert BAM -> radians at the requested output radix. - * Derivation: rad = bam * 2π / 65536. At output radix r: bam * 2π * 2^r / 2^16 - * = bam * (2π * 2^10) / 2^(26 - r) = bam * 6434 >> (26 - r). - */ -#define FR_BAM2RAD(bam, radix) ((s32)(((s32)(u16)(bam) * 6434L) >> (26 - (radix)))) - /*=============================================== * Radian-native and BAM-native trig (recommended) * @@ -406,12 +450,16 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { */ s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); + s32 fr_tan_bam(u16 bam); s32 fr_cos(s32 rad, u16 radix); s32 fr_sin(s32 rad, u16 radix); s32 fr_tan(s32 rad, u16 radix); -#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM(deg)) -#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM(deg)) +/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */ +#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) + +#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) +#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) /*=============================================== * Integer-degree trig API (thin wrappers over the BAM-native path) @@ -423,13 +471,13 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * FR_Sin(deg, radix) — sin of fixed-radix degrees, s15.16 result * FR_Tan(deg, radix) — tan of fixed-radix degrees, s15.16 result */ -#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM(deg)) -#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM(deg)) +#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) +#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) - s32 FR_Cos(s16 deg, u16 radix); - s32 FR_Sin(s16 deg, u16 radix); - s32 FR_TanI(s16 deg); - s32 FR_Tan(s16 deg, u16 radix); + s32 FR_Cos(s32 deg, u16 radix); + s32 FR_Sin(s32 deg, u16 radix); + s32 FR_TanI(s32 deg); + s32 FR_Tan(s32 deg, u16 radix); /* Inverse trig — output in radians at caller-specified radix (s32). * FR_atan2 returns radians at radix 16 (s15.16). diff --git a/src/FR_tan32.c b/src/FR_tan32.c new file mode 100644 index 0000000..1f8fdec --- /dev/null +++ b/src/FR_tan32.c @@ -0,0 +1,282 @@ +/** + * @file FR_tan32.c - division-free tangent and binary-search atan2 + * + * fr_tan_bam32: hybrid tangent — table lookup + sin/cos near pole. + * 0-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64]. + * 45-75°: variable-radix u16 mantissa + shift tables (no division). + * 75-90°: sin/cos ratio from cosine table (one s64 division). + * + * fr_tan_bam32_d64: full-range sin/cos ratio from cosine table. + * Kept for comparison. One s64 division per call. + * + * fr_atan2_32: binary search on the 129-entry u32 tan quadrant table + * (gFR_TAN_TAB_Q), then quadrant mapping. + * + * @copy Copyright (C) <2001-2026> + * @author M A Chatterjee + * + */ + +#include "FR_math.h" +#include "FR_trig_table.h" +#include "FR_tan_table.h" + +#ifndef FR_NO_STDINT +#include +#endif + +/*======================================================= + * cos_lerp_full — interpolated cosine from the 129-entry quadrant table. + * + * Returns cos(inq) in high-precision fixed-point (7 extra frac bits). + * Used internally by fr_tan_bam32 for the 75°-90° sin/cos path and + * by fr_tan_bam32_d64 for the full-range sin/cos path. + */ +static s32 cos_lerp_full(u32 inq) +{ + u32 idx = inq >> FR_TRIG_FRAC_BITS; + u32 frac = inq & FR_TRIG_FRAC_MASK; + s32 lo = gFR_COS_TAB_Q[idx]; + s32 d = lo - gFR_COS_TAB_Q[idx + 1]; + return (lo << FR_TRIG_FRAC_BITS) - d * (s32)frac; +} + +/*======================================================= + * fr_tan_bam32 — hybrid tangent: table lookup + sin/cos near pole. + * + * Three zones: + * 0°-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64]. + * 7-bit index + 7-bit frac. All u32, no division. + * + * 45°-75°: variable-radix u16 mantissa + u8 shift tables + * (gFR_TAN_MANT_Q2 / gFR_TAN_SHIFT_Q2). + * All u32, no division. + * + * 75°-90°: sin/cos ratio via the 129-entry cosine table. + * One s64 division. Handles the pole accurately. + * + * Poles: ±FR_TRIG_MAXVAL (90° = +, 270° = -). + * Result: s32 at radix 16 (s15.16). + */ +#define FR_TAN_OCT_HALF (1 << 13) /* 8192 = 45 deg in BAM quadrant */ +#define FR_TAN_D64_THRESH ((u32)(75.0 / 90.0 * 16384 + 0.5)) /* 13653 */ + +s32 fr_tan_bam32(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; + u32 inq = (u32)bam & 0x3FFFu; + s32 sign = (q & 1) ? -1 : 1; + + /* Poles: exactly 90° or 270° */ + if (inq == 0 && (q & 1)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + if (q & 1) + inq = 0x4000u - inq; + + u32 raw; + + if (inq <= FR_TAN_OCT_HALF) { + /* First octant (0°-45°): direct u32 table lookup */ + u32 idx = inq >> FR_TAN32_FRAC_BITS; + u32 frac = inq & FR_TAN32_FRAC_MASK; + u32 lo = gFR_TAN_TAB_Q[idx]; + u32 delta = gFR_TAN_TAB_Q[idx + 1] - lo; + raw = lo + ((delta * frac) >> FR_TAN32_FRAC_BITS); + } else if (inq < FR_TAN_D64_THRESH) { + /* Second octant 45°-75°: variable-radix u16+shift */ + u32 oct2 = inq - FR_TAN_OCT_HALF; + u32 idx = oct2 >> FR_TAN32_FRAC_BITS; + u32 frac = oct2 & FR_TAN32_FRAC_MASK; + + u32 m_lo = gFR_TAN_MANT_Q2[idx]; + u32 m_hi = gFR_TAN_MANT_Q2[idx + 1]; + u32 s_lo = gFR_TAN_SHIFT_Q2[idx]; + u32 s_hi = gFR_TAN_SHIFT_Q2[idx + 1]; + u32 s_max = (s_hi > s_lo) ? s_hi : s_lo; + + u32 a_lo = m_lo >> (s_max - s_lo); + u32 a_hi = m_hi >> (s_max - s_hi); + u32 delta = a_hi - a_lo; + + raw = (a_lo + ((delta * frac) >> FR_TAN32_FRAC_BITS)) << s_max; + } else { + /* 75°-90°: sin/cos ratio from cosine table (one s64 division) */ + s32 cos_val = cos_lerp_full(inq); + s32 sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq); + if (cos_val == 0) + raw = (u32)FR_TRIG_MAXVAL; + else + raw = (u32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val); + } + + return (sign < 0) ? -(s32)raw : (s32)raw; +} + +/*======================================================= + * fr_tan_bam32_d64 — tangent via sin/cos from the cosine table. + * + * Full-range sin/cos implementation kept for comparison. + * Computes sin(x)/cos(x) using the 129-entry cosine quadrant table. + * One s64 division per call. + */ +s32 fr_tan_bam32_d64(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; + u32 inq = (u32)bam & 0x3FFFu; + s32 sign = 1; + s32 sin_val, cos_val; + s32 raw; + + if (inq == 0 && (q == 0 || q == 2)) + return 0; + if (inq == 0 && (q == 1 || q == 3)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + sign = -1; + } + + cos_val = cos_lerp_full(inq); + sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq); + + if (cos_val == 0) + raw = FR_TRIG_MAXVAL; + else { + raw = (s32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val); + } + + return (sign < 0) ? -raw : raw; +} + +/* fr_tan32: tan from radians at caller-specified radix. s15.16 result. */ +s32 fr_tan32(s32 rad, u16 radix) +{ + return fr_tan_bam32(fr_rad_to_bam(rad, radix)); +} + +/* fr_tan_deg32: tan from degrees at caller-specified radix. s15.16 result. + * radix 0 = integer degrees, radix > 0 = fixed-point degrees with that + * many fractional bits. s32 input so e.g. radix=16 gives s15.16 degrees. */ +s32 fr_tan_deg32(s32 deg, u16 radix) +{ + u16 bam = (radix == 0) ? FR_DEG2BAM_I((s16)deg) + : fr_deg_to_bam(deg, radix); + return fr_tan_bam32(bam); +} + +/*======================================================= + * fr_atan_bam32 - Arctangent via binary search on the tan table. + * + * Input: positive ratio in s15.16 (caller handles signs/quadrants). + * Output: BAM angle (u16) in [0, 0x4000) representing [0, 90 deg). + * + * Algorithm: + * 1. If x <= 0: return 0. + * 2. If x >= table[127]: return near-pole BAM (saturate ~89.3 deg). + * 3. Binary search: 7 iterations on 128 entries to bracket. + * 4. Linear interpolation within bracket for 7 fractional bits. + * 5. Assemble: bam = (idx << 7) | frac. + */ +static u16 fr_atan_bam32(s32 x) +{ + s32 lo, hi, mid; + s32 idx, d, num, frac; + u32 ux; + + if (x <= 0) + return 0; + + ux = (u32)x; + + /* Saturate near the pole */ + if (ux >= gFR_TAN_TAB_Q[127]) + return (u16)((127u << FR_TAN32_FRAC_BITS) + FR_TAN32_FRAC_MASK); + + /* Binary search: find lo such that table[lo] <= ux < table[lo+1]. + * The table is monotonically increasing. */ + lo = 0; + hi = 127; + while (lo < hi) { + mid = (lo + hi + 1) >> 1; + if (gFR_TAN_TAB_Q[mid] <= ux) + lo = mid; + else + hi = mid - 1; + } + + /* lo is now the index where table[lo] <= ux < table[lo+1]. */ + idx = lo; + + /* Linear interpolation within the bracket */ + d = (s32)(gFR_TAN_TAB_Q[idx + 1] - gFR_TAN_TAB_Q[idx]); + num = (s32)(ux - gFR_TAN_TAB_Q[idx]); + if (d > 0) + frac = (s32)(((s64)num << FR_TAN32_FRAC_BITS) / d); + else + frac = 0; + + if (frac > FR_TAN32_FRAC_MASK) + frac = FR_TAN32_FRAC_MASK; + + return (u16)(((u32)idx << FR_TAN32_FRAC_BITS) + (u32)frac); +} + +/*======================================================= + * fr_atan2_32 - Full-circle atan2 using the tan table binary search. + * + * Input: y, x as s32 values at radix 16 (s15.16). + * Output: radians at out_radix. + * Range: [-pi, pi]. Returns 0 for atan2(0, 0). + * + * Algorithm: + * 1. Handle axis cases. + * 2. Compute ratio = |y| / |x| or |x| / |y| (whichever <= 1.0) in s15.16. + * 3. Binary search -> BAM angle in [0, pi/4]. + * 4. If |y| > |x|: angle = pi/2 - angle. + * 5. Apply quadrant from signs of x and y. + */ +s32 fr_atan2_32(s32 y, s32 x, u16 out_radix) +{ + s32 ax, ay, ratio; + u16 bam; + s32 angle; + s32 pi, half_pi; + + pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); + half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); + + /* Axis cases */ + if (x == 0) { + if (y > 0) return half_pi; + if (y < 0) return -half_pi; + return 0; + } + if (y == 0) + return (x > 0) ? 0 : pi; + + ax = (x < 0) ? -x : x; + ay = (y < 0) ? -y : y; + + /* Compute ratio in s15.16. Use the smaller/larger to stay in [0, 1.0] + * for the initial lookup, then complement if needed. */ + if (ay <= ax) { + /* angle in [0, 45 deg]: ratio = ay/ax */ + ratio = (s32)(((s64)ay << 16) / ax); + bam = fr_atan_bam32(ratio); + /* Convert BAM to radians at out_radix */ + angle = FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); + } else { + /* angle in (45, 90 deg): ratio = ax/ay, angle = pi/2 - atan(ratio) */ + ratio = (s32)(((s64)ax << 16) / ay); + bam = fr_atan_bam32(ratio); + angle = half_pi - FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); + } + + /* Apply quadrant from signs of x and y */ + if (x > 0) + return (y > 0) ? angle : -angle; + else + return (y > 0) ? (pi - angle) : (angle - pi); +} diff --git a/src/FR_tan_table.h b/src/FR_tan_table.h new file mode 100644 index 0000000..bdee54c --- /dev/null +++ b/src/FR_tan_table.h @@ -0,0 +1,115 @@ +/** + * @file FR_tan_table.h - tangent quadrant tables (u32, s15.16) + * + * Master table: gFR_TAN_TAB_Q[129] + * 129 entries covering [0, pi/2] in s15.16 fixed-point. + * table[i] = round(tan(i * pi/2 / 128) * 65536), i=0..127 + * table[128] = 0x7FFFFFFF (pole saturation) + * 7-bit index + 7-bit lerp from 14-bit in-quadrant BAM. + * + * Used by: + * fr_tan_bam32(): entries 0-64 directly (first octant, 0°-45°) + * fr_atan_bam32(): all 129 entries for binary-search arctangent + * + * Second-octant variable-radix tables (derived from entries 64-128): + * gFR_TAN_MANT_Q2[65]: u16 mantissa (top 16 bits) + * gFR_TAN_SHIFT_Q2[65]: u8 shift (bits to left-shift) + * Used by fr_tan_bam32() for division-free 45°-90° path. + * + * Total ROM: 129×4 + 65×2 + 65×1 = 711 bytes + * + * @copy Copyright (C) <2001-2026> + * @author M A Chatterjee + * + * Same zlib license as the rest of the library. + */ +#ifndef __FR_TAN_TABLE_H__ +#define __FR_TAN_TABLE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef __FR_Platform_Defs_H__ +#include "FR_defs.h" +#endif + +/* ── 129-entry table (used by atan binary search) ───────── */ +#define FR_TAN32_TABLE_BITS (7) +#define FR_TAN32_TABLE_SIZE ((1 << FR_TAN32_TABLE_BITS) + 1) /* 129 */ +#define FR_TAN32_FRAC_BITS (14 - FR_TAN32_TABLE_BITS) /* 7 */ +#define FR_TAN32_FRAC_MAX (1 << FR_TAN32_FRAC_BITS) /* 128 */ +#define FR_TAN32_FRAC_MASK (FR_TAN32_FRAC_MAX - 1) /* 0x7F */ +#define FR_TAN32_FRAC_HALF (FR_TAN32_FRAC_MAX >> 1) /* 64 */ +#define FR_TAN32_QUADRANT (1 << 14) /* 16384 */ + +static const u32 gFR_TAN_TAB_Q[FR_TAN32_TABLE_SIZE] = { + 0, 804, 1609, 2414, + 3220, 4026, 4834, 5644, + 6455, 7268, 8083, 8901, + 9721, 10545, 11372, 12202, + 13036, 13874, 14717, 15564, + 16416, 17273, 18136, 19005, + 19880, 20762, 21650, 22546, + 23449, 24360, 25280, 26208, + 27146, 28093, 29050, 30018, + 30996, 31986, 32988, 34002, + 35030, 36071, 37126, 38196, + 39281, 40382, 41500, 42636, + 43790, 44963, 46156, 47369, + 48605, 49863, 51145, 52451, + 53784, 55144, 56532, 57950, + 59398, 60880, 62395, 63947, + 65536, 67165, 68835, 70548, + 72308, 74116, 75974, 77887, + 79856, 81885, 83977, 86135, + 88365, 90670, 93054, 95523, + 98082, 100736, 103493, 106358, + 109340, 112447, 115687, 119071, + 122609, 126314, 130198, 134276, + 138564, 143081, 147847, 152884, + 158218, 163878, 169896, 176309, + 183161, 190499, 198380, 206870, + 216043, 225990, 236817, 248648, + 261634, 275959, 291845, 309568, + 329472, 351993, 377693, 407305, + 441808, 482534, 531352, 590958, + 665398, 761030, 888450, 1066730, + 1334016, 1779314, 2669641, 5340086, + 2147483647 +}; + +/* ── Second-octant variable-radix tables (used by forward tan) ── */ + +/* Mantissa table: top 16 bits of gFR_TAN_TAB_Q[64..128]. + * gFR_TAN_MANT_Q2[i] = gFR_TAN_TAB_Q[64+i] >> gFR_TAN_SHIFT_Q2[i] + * 65 entries × 2 bytes = 130 bytes ROM. + */ +static const u16 gFR_TAN_MANT_Q2[65] = { + 32768, 33582, 34417, 35274, 36154, 37058, 37987, 38943, + 39928, 40942, 41988, 43067, 44182, 45335, 46527, 47761, + 49041, 50368, 51746, 53179, 54670, 56223, 57843, 59535, + 61304, 63157, 65099, 33569, 34641, 35770, 36961, 38221, + 39554, 40969, 42474, 44077, 45790, 47624, 49595, 51717, + 54010, 56497, 59204, 62162, 65408, 34494, 36480, 38696, + 41184, 43999, 47211, 50913, 55226, 60316, 33209, 36934, + 41587, 47564, 55528, 33335, 41688, 55603, 41713, 41719, + 65535 +}; + +/* Shift table: bits to left-shift mantissa to reconstruct s15.16 value. + * 65 entries × 1 byte = 65 bytes ROM. + */ +static const u8 gFR_TAN_SHIFT_Q2[65] = { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, + 15 +}; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* __FR_TAN_TABLE_H__ */ diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h index 03a34cd..983c4e2 100644 --- a/src/FR_trig_table.h +++ b/src/FR_trig_table.h @@ -60,10 +60,45 @@ static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = { 12539, 12167, 11793, 11417, 11039, 10659, 10278, 9896, 9512, 9126, 8739, 8351, 7962, 7571, 7179, 6786, 6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612, - 3212, 2811, 2410, 2009, 1608, 1206, 804, 402, + 3212, 2811, 2410, 2009, 1608, 1206, 804, 401, 0 }; +/* ---- Tangent table: 65 entries covering one octant [0, pi/4] ---- + * + * gFR_TAN_TAB_O[i] = round(tan(i * pi/4 / 64) * 32768) for i = 0..64 + * + * Output format: u0.15 stored as u16 (unsigned, 15 fractional bits). + * Entry[64] = 32768 (tan 45° = 1.0 exactly) requires u16; it does not + * fit in s16. + * + * The table is used by fr_tan_bam() in FR_math.c. The first-octant + * lookup gives a raw result in u0.15 which is then shifted to s15.16. + * Second-octant (>45°) uses the reciprocal identity: + * tan(pi/2 - x) = 1/tan(x) + * + * 130 bytes ROM. + */ +#define FR_TAN_TABLE_BITS (6) +#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) /* 65 */ +#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) /* 7 */ +#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) /* 128 */ +#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) /* 0x7F */ +#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) /* 64 */ +#define FR_TAN_OCTANT (1 << 13) /* 8192 */ + +static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { + 0, 402, 804, 1207, 1610, 2013, 2417, 2822, + 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, + 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, + 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, + 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, + 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, + 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, + 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, + 32768 +}; + #ifdef __cplusplus } // extern "C" #endif diff --git a/tests/test_full_coverage.c b/tests/test_full_coverage.c index 0dfd248..36c00f0 100644 --- a/tests/test_full_coverage.c +++ b/tests/test_full_coverage.c @@ -188,7 +188,7 @@ int test_div() { int test_trig_complete() { s16 result; s32 result32; - + /* Test CosI with all quadrants and edge cases */ result = FR_CosI(0); result = FR_CosI(45); @@ -199,51 +199,113 @@ int test_trig_complete() { result = FR_CosI(270); result = FR_CosI(315); result = FR_CosI(360); - + /* Test angles > 180 to hit the branch */ result = FR_CosI(200); /* > 180, will subtract 360 */ result = FR_CosI(350); /* > 180, will subtract 360 */ - + /* Test angles < -180 to hit that branch */ result = FR_CosI(-200); /* < -180, will add 360 */ result = FR_CosI(-350); /* < -180, will add 360 */ - + /* Test SinI */ result = FR_SinI(0); result = FR_SinI(90); result = FR_SinI(180); result = FR_SinI(270); - + /* Test FR_Cos with radix (interpolated) */ result = FR_Cos(45, 8); result = FR_Cos(90, 8); result = FR_Cos(180, 8); - + /* Test FR_Sin with radix */ result = FR_Sin(45, 8); result = FR_Sin(90, 8); - + /* Test TanI with all special cases */ result32 = FR_TanI(0); + if (result32 != 0) return TEST_FAIL; /* tan(0°) = 0 */ result32 = FR_TanI(45); - result32 = FR_TanI(90); /* Special case: returns max */ + if (result32 != 65536) return TEST_FAIL; /* tan(45°) = 1.0 = 65536 */ + result32 = FR_TanI(90); + if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max */ result32 = FR_TanI(135); + if (result32 != -65536) return TEST_FAIL; /* tan(135°) = -1.0 */ result32 = FR_TanI(180); - result32 = FR_TanI(270); /* Special case: returns -max */ - result32 = FR_TanI(-45); /* Negative angle */ - result32 = FR_TanI(-90); /* Negative 90 */ + if (result32 != 0) return TEST_FAIL; /* tan(180°) = 0 */ + result32 = FR_TanI(270); + if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max (positive deg) */ + result32 = FR_TanI(-45); + if (result32 != -65536) return TEST_FAIL; /* tan(-45°) = -1.0 */ + result32 = FR_TanI(-90); + if (result32 != -FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: -max */ result32 = FR_TanI(200); /* > 180 */ result32 = FR_TanI(-200); /* < -180 */ - + /* Test FR_Tan with radix */ result32 = FR_Tan(45, 8); result32 = FR_Tan(30, 8); - + (void)result; (void)result32; return TEST_PASS; } +/* Test fr_tan_bam BAM-native tangent */ +int test_tan_bam() { + s32 result; + + /* Exact zeros: 0° and 180° */ + result = fr_tan_bam(0); /* 0° */ + if (result != 0) return TEST_FAIL; + result = fr_tan_bam(0x8000); /* 180° */ + if (result != 0) return TEST_FAIL; + + /* Exact poles: 90° and 270° */ + result = fr_tan_bam(0x4000); /* 90° = +pole */ + if (result != FR_TRIG_MAXVAL) return TEST_FAIL; + result = fr_tan_bam(0xC000); /* 270° = -pole */ + if (result != -FR_TRIG_MAXVAL) return TEST_FAIL; + + /* 45° = 0x2000: tan(45°) = 1.0 = 65536 in s15.16 */ + result = fr_tan_bam(0x2000); + if (result != 65536) return TEST_FAIL; + + /* 135° = 0x6000: tan(135°) = -1.0 */ + result = fr_tan_bam(0x6000); + if (result != -65536) return TEST_FAIL; + + /* 225° = 0xA000: tan(225°) = 1.0 (same as 45°) */ + result = fr_tan_bam(0xA000); + if (result != 65536) return TEST_FAIL; + + /* 315° = 0xE000: tan(315°) = -1.0 */ + result = fr_tan_bam(0xE000); + if (result != -65536) return TEST_FAIL; + + /* 30° ≈ BAM 5461: tan(30°) = 1/sqrt(3) ≈ 0.57735 → 37837 in s15.16 + * Allow ±50 LSB for table interpolation error */ + result = fr_tan_bam(5461); + if (result < 37700 || result > 37950) return TEST_FAIL; + + /* 60° ≈ BAM 10923: tan(60°) = sqrt(3) ≈ 1.73205 → 113512 in s15.16 + * This exercises the second-octant (reciprocal) path. Allow ±200 LSB. */ + result = fr_tan_bam(10923); + if (result < 113200 || result > 113800) return TEST_FAIL; + + /* Near-pole: 89° ≈ BAM 16202: tan(89°) ≈ 57.29 → huge. + * Just verify it's large and positive. */ + result = fr_tan_bam(16202); + if (result < 3000000) return TEST_FAIL; /* > 45.8 in s15.16 */ + + /* Near-pole: 91° ≈ BAM 16566: tan(91°) ≈ -57.29 → large negative */ + result = fr_tan_bam(16566); + if (result > -3000000) return TEST_FAIL; + + return TEST_PASS; +} + /* Test inverse trig functions */ int test_inverse_trig() { s32 result, input; @@ -748,8 +810,8 @@ int test_edge_branches() { * cos==0 and we hit the saturation return. */ r32 = FR_Tan(90, 0); /* bam=16384 (sin>0) */ if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL; - r32 = FR_Tan(270, 0); /* bam=49152 (sin<0) */ - if (r32 != -FR_TRIG_MAXVAL) return TEST_FAIL; + r32 = FR_Tan(270, 0); /* pole: positive deg → +MAXVAL */ + if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* FR_atan2 now returns radians at out_radix. * At radix 16: pi/2 ≈ 102944, pi ≈ 205887. @@ -1031,6 +1093,7 @@ int main() { printf("\nTrigonometry (Complete):\n"); RUN_TEST(test_trig_complete); + RUN_TEST(test_tan_bam); RUN_TEST(test_inverse_trig); printf("\nLogarithms & Powers (Complete):\n"); diff --git a/tests/test_full_sweep.c b/tests/test_full_sweep.c new file mode 100644 index 0000000..64d7365 --- /dev/null +++ b/tests/test_full_sweep.c @@ -0,0 +1,346 @@ +/** + * test_full_sweep.c — exhaustive error sweep for cos and tan (old & new) + * + * Three independent sweeps, each in its native input domain: + * BAM: all 65536 u16 values (0..65535) + * Radian: every s15.16 LSB from -2pi to +2pi (~823k values) + * Degree: fr_tan_deg32(s32,16) at s15.16, 1/1024 deg steps, ±360 deg (~738k) + * FR_Tan(s16,6) at s9.6 for old (s16 limits range) + * FR_TanI(deg) tested at integer-degree-aligned subset + * + * Error metrics: + * cos: % of full scale (1.0). |comp/65536 - ref| * 100 + * tan: relative % when |ref| >= 0.01, else absolute % of 1.0 + * Skipped when |ref| > 1000 (near-pole, unrepresentable in s15.16) + * + * Also reports average ns/call for each function. + */ + +#include +#include +#include +#include +#include "FR_math.h" +#include "FR_trig_table.h" +#include "FR_tan_table.h" + +/* FR_tan32.c functions */ +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan32(s32 rad, u16 radix); +extern s32 fr_tan_deg32(s32 deg, u16 radix); + +/* ── sweep accumulator ─────────────────────────────── */ + +typedef struct { + const char *name; + double peak_err; + double ref_at_peak; + s32 val_at_peak; + double sum_err; + long count; + char peak_label[64]; + double total_ns; + long time_count; +} sweep_t; + +static void sw_init(sweep_t *s, const char *name) +{ + memset(s, 0, sizeof(*s)); + s->name = name; +} + +static void sw_cos(sweep_t *s, double ref, s32 comp, const char *label) +{ + double comp_dbl = (double)comp / 65536.0; + double pct = fabs(comp_dbl - ref) * 100.0; + s->sum_err += pct; + s->count++; + if (pct > s->peak_err) { + s->peak_err = pct; + s->ref_at_peak = ref; + s->val_at_peak = comp; + strncpy(s->peak_label, label, sizeof(s->peak_label) - 1); + } +} + +#define TAN_CLIP 1000.0 +#define TAN_ZERO 0.01 + +static void sw_tan(sweep_t *s, double ref, s32 comp, const char *label) +{ + if (fabs(ref) > TAN_CLIP) return; + double comp_dbl = (double)comp / 65536.0; + double abs_err = fabs(comp_dbl - ref); + double pct = (fabs(ref) >= TAN_ZERO) + ? (abs_err / fabs(ref)) * 100.0 + : abs_err * 100.0; + s->sum_err += pct; + s->count++; + if (pct > s->peak_err) { + s->peak_err = pct; + s->ref_at_peak = ref; + s->val_at_peak = comp; + strncpy(s->peak_label, label, sizeof(s->peak_label) - 1); + } +} + +static double now_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; +} + +static void sw_header(void) +{ + printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n", + "Function", "Peak Err", "Avg Err", "ns/call", + "Peak At", "Ref Value", "Got (s32)"); + printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n", + "--------------------------", "----------", "----------", "-------", + "------------------------------", "------------", "----------"); +} + +static void sw_print(const sweep_t *s) +{ + double avg = (s->count > 0) ? s->sum_err / (double)s->count : 0.0; + double ns = (s->time_count > 0) ? s->total_ns / (double)s->time_count : 0.0; + printf("| %-26s | %9.4f%% | %9.5f%% | %5.1f | %-30s | %12.6f | %10d |\n", + s->name, s->peak_err, avg, ns, s->peak_label, + s->ref_at_peak, (int)s->val_at_peak); +} + +/* ════════════════════════════════════════════════════════ + * BAM sweep: all 65536 u16 values + * ════════════════════════════════════════════════════════ */ +static void sweep_bam(void) +{ + sweep_t cos_old, tan_old, tan_new; + sw_init(&cos_old, "fr_cos_bam"); + sw_init(&tan_old, "fr_tan_bam (old)"); + sw_init(&tan_new, "fr_tan_bam32 (new)"); + + for (long b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = (double)bam * 2.0 * M_PI / 65536.0; + char label[64]; + snprintf(label, sizeof(label), "BAM %5u (%7.2f deg)", + bam, (double)bam * 360.0 / 65536.0); + + sw_cos(&cos_old, cos(rad), fr_cos_bam(bam), label); + sw_tan(&tan_old, tan(rad), fr_tan_bam(bam), label); + sw_tan(&tan_new, tan(rad), fr_tan_bam32(bam), label); + } + + /* timing */ + { + volatile s32 sink = 0; + double t0, t1; + long N = 65536; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_cos_bam((u16)b); + t1 = now_ns(); + cos_old.total_ns = t1 - t0; cos_old.time_count = N; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b); + t1 = now_ns(); + tan_old.total_ns = t1 - t0; tan_old.time_count = N; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b); + t1 = now_ns(); + tan_new.total_ns = t1 - t0; tan_new.time_count = N; + + (void)sink; + } + + printf("### BAM domain — all 65536 u16 values\n\n"); + sw_header(); + sw_print(&cos_old); + sw_print(&tan_old); + sw_print(&tan_new); + printf("\ntan samples: old=%ld, new=%ld (rest skipped near poles)\n\n", + tan_old.count, tan_new.count); +} + +/* ════════════════════════════════════════════════════════ + * Radian sweep: every s15.16 LSB from -2pi to +2pi + * ════════════════════════════════════════════════════════ */ +static void sweep_rad(void) +{ + sweep_t cos_old, tan_old, tan_new; + sw_init(&cos_old, "fr_cos (s15.16)"); + sw_init(&tan_old, "fr_tan (s15.16)"); + sw_init(&tan_new, "fr_tan32 (s15.16)"); + + s32 two_pi = (s32)(2.0 * M_PI * 65536.0 + 0.5); /* 411775 */ + long total = 0; + + for (s32 r = -two_pi; r <= two_pi; r++) { + double rad = (double)r / 65536.0; + char label[64]; + snprintf(label, sizeof(label), "r16=%d (%.4f rad)", r, rad); + + sw_cos(&cos_old, cos(rad), fr_cos(r, 16), label); + sw_tan(&tan_old, tan(rad), fr_tan(r, 16), label); + sw_tan(&tan_new, tan(rad), fr_tan32(r, 16), label); + total++; + } + + /* timing */ + { + volatile s32 sink = 0; + double t0, t1; + long N = 65536; + s32 step = (2 * two_pi) / N; + if (step < 1) step = 1; + + t0 = now_ns(); + for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_cos(r, 16); + t1 = now_ns(); + cos_old.total_ns = t1 - t0; cos_old.time_count = N; + + t0 = now_ns(); + for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan(r, 16); + t1 = now_ns(); + tan_old.total_ns = t1 - t0; tan_old.time_count = N; + + t0 = now_ns(); + for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan32(r, 16); + t1 = now_ns(); + tan_new.total_ns = t1 - t0; tan_new.time_count = N; + + (void)sink; + } + + printf("### Radian domain — every s15.16 LSB, -2pi..+2pi (%ld values)\n\n", total); + sw_header(); + sw_print(&cos_old); + sw_print(&tan_old); + sw_print(&tan_new); + printf("\ntan samples: old=%ld, new=%ld\n\n", tan_old.count, tan_new.count); +} + +/* ════════════════════════════════════════════════════════ + * Degree sweep: all 65536 s16 values at radix 6 (s9.6) + * s15.16 degrees: every LSB from -360*65536 to +360*65536 (~823k values) + * FR_Tan(deg,16) — old, s16 input limits to ±0.5 deg (too narrow!) + * fr_tan_deg32(deg,16) — new, s32 input, full s15.16 range + * FR_TanI(deg) — integer degrees (tested at integer-aligned subset) + * + * NOTE: FR_Tan still takes s16, so its s15.16 sweep only covers ±0.5 deg. + * To get a fair comparison we ALSO test FR_Tan at radix=6 (s9.6, ±512 deg). + * ════════════════════════════════════════════════════════ */ +static void sweep_deg(void) +{ + sweep_t cos_old, tan_old_s16, tan_new_full, tan_old_int; + sw_init(&cos_old, "FR_Cos (s9.6 deg)"); + sw_init(&tan_old_s16, "FR_Tan (s9.6 deg, s16)"); + sw_init(&tan_new_full, "fr_tan_deg32 (s15.16 deg)"); + sw_init(&tan_old_int, "FR_TanI (int deg)"); + + /* New path: s15.16 degrees, every LSB from -360 to +360. + * 360 * 65536 = 23592960. Total ~47M values — too many. + * Use same density as radian sweep: ~823k values. + * -360..+360 deg = 720 deg range. 823551 / 720 ≈ 1144 steps/deg. + * That's close to radix=10 (1024 steps/deg). Use radix=16 with + * step = 65536/1024 = 64 to get ~720k values. */ + s32 deg360_s16 = 360L * 65536; + s32 step_new = 64; /* every 64th LSB of s15.16 = 1/1024 deg */ + long total_new = 0; + + for (s32 d = -deg360_s16; d <= deg360_s16; d += step_new) { + double deg_dbl = (double)d / 65536.0; + double rad = deg_dbl * M_PI / 180.0; + char label[64]; + snprintf(label, sizeof(label), "d16=%d (%.4f deg)", (int)d, deg_dbl); + + double rt = tan(rad); + sw_tan(&tan_new_full, rt, fr_tan_deg32(d, 16), label); + + /* FR_TanI at integer-degree subset */ + if (d % 65536 == 0) { + s16 ideg = (s16)(d / 65536); + char ilabel[64]; + snprintf(ilabel, sizeof(ilabel), "deg=%d", ideg); + sw_tan(&tan_old_int, rt, FR_TanI(ideg), ilabel); + } + + total_new++; + } + + /* Old path: FR_Tan takes s16, so use radix=6 (s9.6) to cover ±512 deg */ + long total_old = 0; + for (long d = -32768; d <= 32767; d++) { + s16 dval = (s16)d; + double deg_dbl = (double)d / 64.0; + double rad = deg_dbl * M_PI / 180.0; + char label[64]; + snprintf(label, sizeof(label), "d6=%d (%.3f deg)", (int)d, deg_dbl); + + sw_cos(&cos_old, cos(rad), FR_Cos(dval, 6), label); + sw_tan(&tan_old_s16, tan(rad), FR_Tan(dval, 6), label); + total_old++; + } + + /* timing */ + { + volatile s32 sink = 0; + double t0, t1; + long N = 65536; + + t0 = now_ns(); + for (long d = -32768; d <= 32767; d++) sink += FR_Cos((s16)d, 6); + t1 = now_ns(); + cos_old.total_ns = t1 - t0; cos_old.time_count = N; + + t0 = now_ns(); + for (long d = -32768; d <= 32767; d++) sink += FR_Tan((s16)d, 6); + t1 = now_ns(); + tan_old_s16.total_ns = t1 - t0; tan_old_s16.time_count = N; + + s32 tstep = (2 * deg360_s16) / N; + t0 = now_ns(); + for (s32 d = -deg360_s16; d <= deg360_s16; d += tstep) + sink += fr_tan_deg32(d, 16); + t1 = now_ns(); + tan_new_full.total_ns = t1 - t0; tan_new_full.time_count = N; + + t0 = now_ns(); + for (long i = -360; i < 360; i++) sink += FR_TanI((s16)i); + t1 = now_ns(); + tan_old_int.total_ns = t1 - t0; tan_old_int.time_count = 720; + + (void)sink; + } + + printf("### Degree domain\n\n"); + printf("fr_tan_deg32: s32 input, radix=16, every 1/1024 deg, ±360 deg (%ld values)\n", total_new); + printf("FR_Tan: s16 input, radix=6 (s9.6), all 65536 s16 values (%ld values)\n\n", total_old); + sw_header(); + sw_print(&cos_old); + sw_print(&tan_old_s16); + sw_print(&tan_new_full); + sw_print(&tan_old_int); + printf("\ntan samples: old_s16=%ld, new_s32=%ld, old_int=%ld\n\n", + tan_old_s16.count, tan_new_full.count, tan_old_int.count); +} + +/* ── main ──────────────────────────────────────────── */ + +int main(void) +{ + printf("FR_Math exhaustive error sweep: cos, tan (old), tan32 (new)\n"); + printf("============================================================\n"); + printf("cos: error = %% of full scale (1.0)\n"); + printf("tan: relative %% when |ref|>=0.01, absolute when near zero, skip |ref|>1000\n\n"); + + sweep_bam(); + sweep_rad(); + sweep_deg(); + + printf("Done.\n"); + return 0; +} diff --git a/tests/test_pole_table.c b/tests/test_pole_table.c new file mode 100644 index 0000000..02a2829 --- /dev/null +++ b/tests/test_pole_table.c @@ -0,0 +1,92 @@ +/** + * test_pole_table.c — dump values around both tan poles (90° and 270°) + * + * For ±20 entries around each pole, show: + * BAM index, degrees, ground truth, and each function's output + error + */ + +#include +#include +#include "FR_math.h" +#include "FR_trig_table.h" +#include "FR_tan_table.h" + +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan32(s32 rad, u16 radix); +extern s32 fr_tan_deg32(s32 deg, u16 radix); + +static double to_dbl(s32 v) { return (double)v / 65536.0; } + +static double err_pct(double ref, double got) +{ + double ae = fabs(got - ref); + if (fabs(ref) >= 0.01) + return (ae / fabs(ref)) * 100.0; + return ae * 100.0; /* absolute near zero */ +} + +static void dump_pole(u16 pole_bam, const char *name, int range) +{ + printf("\n### Pole at %s (BAM %u)\n\n", name, pole_bam); + printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", + "BAM", "deg", "ground truth", + "tan_bam OLD", "err%", + "tan_bam32 NEW", "err%", + "tan(rad) NEW", "err%", + "tan(deg) NEW", "err%"); + printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", + "-----", "---------", "--------------", + "--------------", "-------", + "--------------", "-------", + "--------------", "-------", + "--------------", "-------"); + + for (int i = -range; i <= range; i++) { + u16 bam = (u16)((int)pole_bam + i); + double rad_dbl = (double)bam * 2.0 * M_PI / 65536.0; + double deg_dbl = (double)bam * 360.0 / 65536.0; + double truth = tan(rad_dbl); + + /* BAM functions */ + double v_bam_old = to_dbl(fr_tan_bam(bam)); + double v_bam_new = to_dbl(fr_tan_bam32(bam)); + + /* Radian: convert BAM to s15.16 radian the same way the library does */ + s32 r16 = (s32)(rad_dbl * 65536.0 + (rad_dbl >= 0 ? 0.5 : -0.5)); + double v_rad_new = to_dbl(fr_tan32(r16, 16)); + + /* Degree: convert to s9.6 */ + s16 d6 = (s16)(int)(deg_dbl * 64.0 + (deg_dbl >= 0 ? 0.5 : -0.5)); + double v_deg_new = to_dbl(fr_tan_deg32(d6, 6)); + + /* Clip display for readability */ + if (fabs(truth) > 100000.0) { + printf("| %5u | %9.3f | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", + bam, deg_dbl, ">>pole<<", + "---", "---", "---", "---", "---", "---", "---", "---"); + continue; + } + + printf("| %5u | %9.3f | %14.4f | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% |\n", + bam, deg_dbl, truth, + v_bam_old, err_pct(truth, v_bam_old), + v_bam_new, err_pct(truth, v_bam_new), + v_rad_new, err_pct(truth, v_rad_new), + v_deg_new, err_pct(truth, v_deg_new)); + } +} + +int main(void) +{ + printf("FR_Math tan pole neighborhood dump\n"); + printf("==================================\n"); + printf("Values within ±20 BAM steps of each pole.\n"); + printf("Error: relative %% when |ref|>=0.01, absolute otherwise.\n"); + + /* 90° pole = BAM 16384, 270° pole = BAM 49152 */ + dump_pole(16384, "90 deg", 20); + dump_pole(49152, "270 deg", 20); + + printf("\nDone.\n"); + return 0; +} diff --git a/tests/test_sweep_csv.c b/tests/test_sweep_csv.c new file mode 100644 index 0000000..5b33cbc --- /dev/null +++ b/tests/test_sweep_csv.c @@ -0,0 +1,149 @@ +/** + * test_sweep_csv.c — emit CSV + summary for all 65536 BAM values + * + * Compares 3 tan implementations: + * fr_tan_bam (old): 65-entry u16 octant table + reciprocal + * fr_tan_bam32_d64: sin/cos from 129-entry cos table, s64 div + * fr_tan_bam32 (new): direct 65-entry u32 tan table lookup, no div + * + * Ground truth clamped to ±SAT_MAX for fair pole comparison. + * + * Output: build/tan_sweep.csv + */ + +#include +#include +#include +#include "FR_math.h" +#include "FR_trig_table.h" +#include "FR_tan_table.h" + +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan_bam32_d64(u16 bam); + +#define SAT_MAX (32767.999984741211) + +static double to_dbl(s32 v) { return (double)v / 65536.0; } + +static double clamp(double v) +{ + if (v > SAT_MAX) return SAT_MAX; + if (v < -SAT_MAX) return -SAT_MAX; + return v; +} + +static double err_pct(double ref, double got) +{ + if (fabs(ref) >= SAT_MAX && fabs(got) >= SAT_MAX) + return 0.0; + double ae = fabs(got - ref); + if (fabs(ref) >= 0.01) + return (ae / fabs(ref)) * 100.0; + return ae * 100.0; +} + +static double now_ns(void) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; +} + +int main(void) +{ + FILE *fp = fopen("build/tan_sweep.csv", "w"); + if (!fp) { perror("fopen"); return 1; } + + fprintf(fp, "bam,degrees,tan_truth," + "tan_old,tan_d64,tan_direct," + "err_old,err_d64,err_direct\n"); + + for (long b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double deg = (double)bam * 360.0 / 65536.0; + double rad = (double)bam * 2.0 * M_PI / 65536.0; + double truth = clamp(tan(rad)); + + double v_old = to_dbl(fr_tan_bam(bam)); + double v_d64 = to_dbl(fr_tan_bam32_d64(bam)); + double v_direct = to_dbl(fr_tan_bam32(bam)); + + fprintf(fp, "%u,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f\n", + bam, deg, truth, + v_old, v_d64, v_direct, + err_pct(truth, v_old), + err_pct(truth, v_d64), + err_pct(truth, v_direct)); + } + + fclose(fp); + + /* Timing */ + volatile s32 sink = 0; + double t0, t1; + long N = 65536; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b); + t1 = now_ns(); + double ns_old = (t1 - t0) / N; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_tan_bam32_d64((u16)b); + t1 = now_ns(); + double ns_d64 = (t1 - t0) / N; + + t0 = now_ns(); + for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b); + t1 = now_ns(); + double ns_direct = (t1 - t0) / N; + + (void)sink; + + /* Stats */ + printf("Wrote build/tan_sweep.csv (65536 rows)\n\n"); + + double peak_old = 0, peak_d64 = 0, peak_dir = 0; + double sum_old = 0, sum_d64 = 0, sum_dir = 0; + int peak_bam_old = 0, peak_bam_d64 = 0, peak_bam_dir = 0; + + for (long b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = (double)bam * 2.0 * M_PI / 65536.0; + double truth = clamp(tan(rad)); + + double e_old = err_pct(truth, to_dbl(fr_tan_bam(bam))); + double e_d64 = err_pct(truth, to_dbl(fr_tan_bam32_d64(bam))); + double e_dir = err_pct(truth, to_dbl(fr_tan_bam32(bam))); + + sum_old += e_old; sum_d64 += e_d64; sum_dir += e_dir; + if (e_old > peak_old) { peak_old = e_old; peak_bam_old = bam; } + if (e_d64 > peak_d64) { peak_d64 = e_d64; peak_bam_d64 = bam; } + if (e_dir > peak_dir) { peak_dir = e_dir; peak_bam_dir = bam; } + } + + printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n", + "Implementation", "Table", "Peak Err", "Avg Err", "ns/call", "Peak At"); + printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n", + "------------------------", "-----", "----------", "----------", "-------", + "------------------------"); + printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", + "fr_tan_bam (old)", "65u16", + peak_old, sum_old / 65536, ns_old, + peak_bam_old, peak_bam_old * 360.0 / 65536.0); + printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", + "fr_tan_bam32_d64 (s/c)", "none", + peak_d64, sum_d64 / 65536, ns_d64, + peak_bam_d64, peak_bam_d64 * 360.0 / 65536.0); + printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", + "fr_tan_bam32 (direct)", "65u32", + peak_dir, sum_dir / 65536, ns_direct, + peak_bam_dir, peak_bam_dir * 360.0 / 65536.0); + + printf("\nOld: 65-entry u16 octant table + reciprocal (div in 2nd octant).\n"); + printf("d64: sin/cos via 129-entry cos table, always s64 div.\n"); + printf("Direct: 65-entry u32 quadrant tan table, lerp with shift, NO div.\n"); + + printf("\nDone.\n"); + return 0; +} diff --git a/tests/test_tan32.c b/tests/test_tan32.c new file mode 100644 index 0000000..ec13184 --- /dev/null +++ b/tests/test_tan32.c @@ -0,0 +1,424 @@ +/* + * test_tan32.c - Head-to-head comparison of LUT32 tan/atan2 vs current impls + * + * Compares: + * fr_tan_bam32() vs fr_tan_bam() — BAM accuracy + speed + * fr_tan32() vs fr_tan() — radian accuracy + * fr_tan_deg32() vs FR_TanI() — integer-degree accuracy + * fr_atan2_32() vs FR_atan2() — accuracy + speed + * + * Compile: + * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32.c -lm -o build/test_tan32 + * + * @author M A Chatterjee + */ + +#include +#include +#include +#include +#include "../src/FR_math.h" + +/* Declarations for the new LUT32 functions (in FR_tan32.c) */ +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan32(s32 rad, u16 radix); +extern s32 fr_tan_deg32(s32 deg, u16 radix); +extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix); + +/*======================================================= + * Helpers + */ +static double fr2d(s32 val, int radix) { + return (double)val / (double)(1L << radix); +} + +/*======================================================= + * Test 1: Tangent accuracy sweep — all 65536 BAM points + */ +static void test_tan_bam_accuracy(void) +{ + double max_err_old = 0.0, max_err_new = 0.0; + double sum_err_old = 0.0, sum_err_new = 0.0; + u16 max_bam_old = 0, max_bam_new = 0; + int count = 0; + u16 bam; + + printf("## Tangent BAM Accuracy (65536 BAM points)\n\n"); + + for (bam = 0; bam < 0xFFFFu; bam++) { + double angle = (double)bam * 2.0 * M_PI / 65536.0; + double ref = tan(angle); + double old_val, new_val, err_old, err_new; + + /* Skip near poles where tan -> infinity (within ~1 deg of 90/270) */ + if (fabs(ref) > 500.0) continue; + + old_val = fr2d(fr_tan_bam(bam), 16); + new_val = fr2d(fr_tan_bam32(bam), 16); + + /* Percentage error relative to reference */ + if (fabs(ref) > 0.001) { + err_old = fabs((old_val - ref) / ref) * 100.0; + err_new = fabs((new_val - ref) / ref) * 100.0; + } else { + /* Near zero, use absolute error scaled to % of 1.0 */ + err_old = fabs(old_val - ref) * 100.0; + err_new = fabs(new_val - ref) * 100.0; + } + + sum_err_old += err_old; + sum_err_new += err_new; + if (err_old > max_err_old) { max_err_old = err_old; max_bam_old = bam; } + if (err_new > max_err_new) { max_err_new = err_new; max_bam_new = bam; } + count++; + } + + printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n"); + printf("|----------------|----------------------|----------------------|\n"); + printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); + printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); + printf("| Peak BAM | 0x%04X | 0x%04X |\n", max_bam_old, max_bam_new); + printf("| Points tested | %6d | %6d |\n", count, count); + printf("\n"); +} + +/*======================================================= + * Test 2: Tangent radian accuracy — sweep at radix 16 + */ +static void test_tan_radian_accuracy(void) +{ + double max_err_old = 0.0, max_err_new = 0.0; + double sum_err_old = 0.0, sum_err_new = 0.0; + int count = 0; + int i; + + printf("## Tangent Radian Accuracy (10000 points, radix 16)\n\n"); + + /* Sweep radians from -pi to pi in 10000 steps */ + for (i = 0; i < 10000; i++) { + double angle = -M_PI + 2.0 * M_PI * (double)i / 10000.0; + double ref = tan(angle); + s32 rad16 = (s32)(angle * 65536.0); + double old_val, new_val, err_old, err_new; + + if (fabs(ref) > 500.0) continue; + + old_val = fr2d(fr_tan(rad16, 16), 16); + new_val = fr2d(fr_tan32(rad16, 16), 16); + + if (fabs(ref) > 0.001) { + err_old = fabs((old_val - ref) / ref) * 100.0; + err_new = fabs((new_val - ref) / ref) * 100.0; + } else { + err_old = fabs(old_val - ref) * 100.0; + err_new = fabs(new_val - ref) * 100.0; + } + + sum_err_old += err_old; + sum_err_new += err_new; + if (err_old > max_err_old) max_err_old = err_old; + if (err_new > max_err_new) max_err_new = err_new; + count++; + } + + printf("| Metric | Current (fr_tan) | LUT32 (fr_tan32) |\n"); + printf("|----------------|----------------------|----------------------|\n"); + printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); + printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); + printf("| Points tested | %6d | %6d |\n", count, count); + printf("\n"); +} + +/*======================================================= + * Test 3: Tangent integer-degree accuracy — 0..359 degrees + */ +static void test_tan_degree_accuracy(void) +{ + double max_err_old = 0.0, max_err_new = 0.0; + double sum_err_old = 0.0, sum_err_new = 0.0; + int count = 0; + int deg; + + printf("## Tangent Integer-Degree Accuracy (360 degrees)\n\n"); + + for (deg = 0; deg < 360; deg++) { + double angle = (double)deg * M_PI / 180.0; + double ref = tan(angle); + double old_val, new_val, err_old, err_new; + + if (fabs(ref) > 500.0) continue; + + old_val = fr2d(FR_TanI((s16)deg), 16); + new_val = fr2d(fr_tan_deg32((s16)deg, 0), 16); + + if (fabs(ref) > 0.001) { + err_old = fabs((old_val - ref) / ref) * 100.0; + err_new = fabs((new_val - ref) / ref) * 100.0; + } else { + err_old = fabs(old_val - ref) * 100.0; + err_new = fabs(new_val - ref) * 100.0; + } + + sum_err_old += err_old; + sum_err_new += err_new; + if (err_old > max_err_old) max_err_old = err_old; + if (err_new > max_err_new) max_err_new = err_new; + count++; + } + + printf("| Metric | Current (FR_TanI) | LUT32 (fr_tan_deg32) |\n"); + printf("|----------------|----------------------|----------------------|\n"); + printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); + printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); + printf("| Points tested | %6d | %6d |\n", count, count); + printf("\n"); +} + +/*======================================================= + * Test 4: Tangent speed comparison (BAM) + */ +static void test_tan_speed(void) +{ + volatile s32 sink = 0; + clock_t start, end; + double old_ns, new_ns; + int iters = 1000000; + int i; + + printf("## Tangent Speed (%d iterations)\n\n", iters); + + /* Warm up */ + for (i = 0; i < 1000; i++) sink += fr_tan_bam((u16)i); + + start = clock(); + for (i = 0; i < iters; i++) + sink += fr_tan_bam((u16)(i & 0xFFFF)); + end = clock(); + old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; + + start = clock(); + for (i = 0; i < iters; i++) + sink += fr_tan_bam32((u16)(i & 0xFFFF)); + end = clock(); + new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; + + printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n"); + printf("|----------------|----------------------|----------------------|\n"); + printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns); + printf("\n"); + + (void)sink; +} + +/*======================================================= + * Test 5: atan2 accuracy sweep — angles at multiple radii + */ +static void test_atan2_accuracy(void) +{ + double max_err_old = 0.0, max_err_new = 0.0; + double sum_err_old = 0.0, sum_err_new = 0.0; + int count = 0; + int ri, ai; + static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 }; + + printf("## atan2 Accuracy Sweep (5 radii x 65536 angles)\n\n"); + + for (ri = 0; ri < 5; ri++) { + double r = radii[ri]; + for (ai = 0; ai < 65536; ai++) { + double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI; + double fx = r * cos(angle); + double fy = r * sin(angle); + s32 x = (s32)(fx * 65536.0); + s32 y = (s32)(fy * 65536.0); + double ref = atan2(fy, fx); + double old_val, new_val, err_old, err_new; + + /* Skip degenerate */ + if (x == 0 && y == 0) continue; + + old_val = fr2d(FR_atan2(y, x, 16), 16); + new_val = fr2d(fr_atan2_32(y, x, 16), 16); + + /* Absolute error in radians, wrapped to [-pi, pi] */ + err_old = fabs(old_val - ref); + err_new = fabs(new_val - ref); + /* Handle wraparound near +/-pi: difference > pi means we + * crossed the branch cut; true angular error is 2*pi - diff */ + if (err_old > M_PI) err_old = 2.0 * M_PI - err_old; + if (err_new > M_PI) err_new = 2.0 * M_PI - err_new; + /* Convert to % of pi for reporting */ + err_old = err_old / M_PI * 100.0; + err_new = err_new / M_PI * 100.0; + + sum_err_old += err_old; + sum_err_new += err_new; + if (err_old > max_err_old) max_err_old = err_old; + if (err_new > max_err_new) max_err_new = err_new; + count++; + } + } + + printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n"); + printf("|---------------------|----------------------|----------------------|\n"); + printf("| Peak error (%% of pi)| %11.6f | %11.6f |\n", max_err_old, max_err_new); + printf("| Avg error (%% of pi) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); + printf("| Points tested | %6d | %6d |\n", count, count); + printf("\n"); +} + +/*======================================================= + * Test 6: atan2 speed comparison + */ +static void test_atan2_speed(void) +{ + volatile s32 sink = 0; + clock_t start, end; + double old_ns, new_ns; + int iters = 500000; + int i; + + printf("## atan2 Speed (%d iterations)\n\n", iters); + + /* Pre-compute some x,y pairs */ + s32 xs[256], ys[256]; + for (i = 0; i < 256; i++) { + double angle = (double)i * 2.0 * M_PI / 256.0; + xs[i] = (s32)(10.0 * cos(angle) * 65536.0); + ys[i] = (s32)(10.0 * sin(angle) * 65536.0); + } + + /* Warm up */ + for (i = 0; i < 256; i++) sink += FR_atan2(ys[i], xs[i], 16); + + start = clock(); + for (i = 0; i < iters; i++) + sink += FR_atan2(ys[i & 0xFF], xs[i & 0xFF], 16); + end = clock(); + old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; + + start = clock(); + for (i = 0; i < iters; i++) + sink += fr_atan2_32(ys[i & 0xFF], xs[i & 0xFF], 16); + end = clock(); + new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; + + printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n"); + printf("|----------------|----------------------|----------------------|\n"); + printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns); + printf("\n"); + + (void)sink; +} + +/*======================================================= + * Test 7: Quick spot checks for correctness + */ +static int test_spot_checks(void) +{ + int fails = 0; + s32 v; + + printf("## Spot Checks\n\n"); + + /* tan(0) = 0 */ + v = fr_tan_bam32(0); + if (v != 0) { printf(" FAIL: tan_bam32(0) = %d, expected 0\n", v); fails++; } + + /* tan(45 deg) = 1.0 = 65536 in s15.16 */ + v = fr_tan_bam32(0x2000); /* 45 deg = 8192 BAM */ + if (abs(v - 65536) > 2) { printf(" FAIL: tan_bam32(45deg) = %d, expected ~65536\n", v); fails++; } + + /* tan(180 deg) = 0 */ + v = fr_tan_bam32(0x8000); + if (v != 0) { printf(" FAIL: tan_bam32(180deg) = %d, expected 0\n", v); fails++; } + + /* tan(90 deg) = pole */ + v = fr_tan_bam32(0x4000); + if (v != FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(90deg) = %d, expected %d\n", v, FR_TRIG_MAXVAL); fails++; } + + /* tan(270 deg) = -pole */ + v = fr_tan_bam32(0xC000); + if (v != -FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(270deg) = %d, expected %d\n", v, -FR_TRIG_MAXVAL); fails++; } + + /* Radian wrapper: tan(pi/4) = 1.0 */ + { + s32 pi_4 = (s32)(M_PI / 4.0 * 65536.0); + v = fr_tan32(pi_4, 16); + if (abs(v - 65536) > 100) { printf(" FAIL: tan32(pi/4) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; } + } + + /* Degree wrapper: tan(45) = 1.0 */ + v = fr_tan_deg32(45, 0); + if (abs(v - 65536) > 100) { printf(" FAIL: tan_deg32(45) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; } + + /* Degree wrapper: tan(0) = 0 */ + v = fr_tan_deg32(0, 0); + if (v != 0) { printf(" FAIL: tan_deg32(0) = %d, expected 0\n", v); fails++; } + + /* atan2(0, 1) = 0 */ + v = fr_atan2_32(0, 65536, 16); + if (v != 0) { printf(" FAIL: atan2_32(0,1) = %d, expected 0\n", v); fails++; } + + /* atan2(1, 0) = pi/2 */ + { + s32 expected = FR_CHRDX(FR_kQ2RAD, FR_kPREC, 16); + v = fr_atan2_32(65536, 0, 16); + if (abs(v - expected) > 2) { printf(" FAIL: atan2_32(1,0) = %d, expected ~%d\n", v, expected); fails++; } + } + + /* atan2(1, 1) = pi/4 */ + { + double ref = M_PI / 4.0; + s32 expected = (s32)(ref * 65536.0); + v = fr_atan2_32(65536, 65536, 16); + if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(1,1) = %d (%.6f), expected ~%d (%.6f)\n", + v, fr2d(v, 16), expected, ref); fails++; } + } + + /* atan2(-1, -1) = -3*pi/4 */ + { + double ref = -3.0 * M_PI / 4.0; + s32 expected = (s32)(ref * 65536.0); + v = fr_atan2_32(-65536, -65536, 16); + if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(-1,-1) = %d (%.6f), expected ~%d (%.6f)\n", + v, fr2d(v, 16), expected, ref); fails++; } + } + + if (fails == 0) + printf(" All spot checks PASSED\n"); + else + printf(" %d spot check(s) FAILED\n", fails); + printf("\n"); + + return fails; +} + +/*======================================================= + * Main + */ +int main(void) +{ + int fails; + + printf("# FR_tan32 Head-to-Head Comparison Report\n\n"); + + fails = test_spot_checks(); + test_tan_bam_accuracy(); + test_tan_radian_accuracy(); + test_tan_degree_accuracy(); + test_tan_speed(); + test_atan2_accuracy(); + test_atan2_speed(); + + printf("## Summary\n\n"); + printf("Design notes:\n"); + printf(" - tan: sin/cos from the existing 129-entry cosine table (258B, already in ROM)\n"); + printf(" No extra tan table needed for the forward path. One s64 division per call.\n"); + printf(" Current uses octant table (130B) + reciprocal division for [45,90] deg.\n\n"); + printf(" - atan2: binary search on 129-entry u32 tan table (516B) + quadrant mapping\n"); + printf(" Current uses hypot_fast8 -> asin/acos chain (more code, no extra table)\n\n"); + printf(" - Tan table (516B) needed only for atan2. Could be omitted if atan2 not used.\n\n"); + + return fails ? 1 : 0; +} diff --git a/tests/test_tan32_peaks.c b/tests/test_tan32_peaks.c new file mode 100644 index 0000000..32e0437 --- /dev/null +++ b/tests/test_tan32_peaks.c @@ -0,0 +1,198 @@ +/* + * test_tan32_peaks.c - Find peak error locations and print ±20 entries around them + */ +#include +#include +#include +#include "../src/FR_math.h" + +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan32(s32 rad, u16 radix); +extern s32 fr_tan_deg32(s32 deg, u16 radix); + +static double fr2d(s32 val, int radix) { + return (double)val / (double)(1L << radix); +} + +static double tan_err(double val, double ref) { + if (fabs(ref) > 0.001) + return fabs((val - ref) / ref) * 100.0; + else + return fabs(val - ref) * 100.0; +} + +/*======================================================= + * BAM peak finder + neighborhood dump + */ +static void peak_tan_bam(void) +{ + s32 bam; + s32 old_peak_bam = 0, new_peak_bam = 0; + double old_peak = 0, new_peak = 0; + + /* Pass 1: find peaks */ + for (bam = 0; bam < 65536; bam++) { + double angle = (double)bam * 2.0 * M_PI / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) continue; + double ov = fr2d(fr_tan_bam((u16)bam), 16); + double nv = fr2d(fr_tan_bam32((u16)bam), 16); + double oe = tan_err(ov, ref); + double ne = tan_err(nv, ref); + if (oe > old_peak) { old_peak = oe; old_peak_bam = bam; } + if (ne > new_peak) { new_peak = ne; new_peak_bam = bam; } + } + + printf("## tan BAM: OLD peak at BAM %d (%.4f deg), NEW peak at BAM %d (%.4f deg)\n\n", + (int)old_peak_bam, old_peak_bam * 360.0 / 65536.0, + (int)new_peak_bam, new_peak_bam * 360.0 / 65536.0); + + /* Pass 2: dump ±20 around OLD peak */ + printf("### OLD peak neighborhood (BAM %d ± 20)\n\n", (int)old_peak_bam); + printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); + printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); + for (bam = old_peak_bam - 20; bam <= old_peak_bam + 20; bam++) { + u16 b = (u16)(bam & 0xFFFF); + double angle = (double)b * 2.0 * M_PI / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; } + double ov = fr2d(fr_tan_bam(b), 16); + double nv = fr2d(fr_tan_bam32(b), 16); + printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", + bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), + (bam == old_peak_bam) ? " <-- OLD PEAK" : (bam == new_peak_bam) ? " <-- NEW PEAK" : ""); + } + + if (abs((int)(new_peak_bam - old_peak_bam)) > 25) { + printf("\n### NEW peak neighborhood (BAM %d ± 20)\n\n", (int)new_peak_bam); + printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); + printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); + for (bam = new_peak_bam - 20; bam <= new_peak_bam + 20; bam++) { + u16 b = (u16)(bam & 0xFFFF); + double angle = (double)b * 2.0 * M_PI / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; } + double ov = fr2d(fr_tan_bam(b), 16); + double nv = fr2d(fr_tan_bam32(b), 16); + printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", + bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), + (bam == new_peak_bam) ? " <-- NEW PEAK" : ""); + } + } + + printf("\n"); +} + +/*======================================================= + * Radian peak finder + neighborhood dump + */ +static void peak_tan_rad(void) +{ + s32 rad16; + s32 old_peak_r = 0, new_peak_r = 0; + double old_peak = 0, new_peak = 0; + + for (rad16 = -65536; rad16 <= 65535; rad16++) { + double angle = (double)rad16 / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) continue; + double ov = fr2d(fr_tan(rad16, 16), 16); + double nv = fr2d(fr_tan32(rad16, 16), 16); + double oe = tan_err(ov, ref); + double ne = tan_err(nv, ref); + if (oe > old_peak) { old_peak = oe; old_peak_r = rad16; } + if (ne > new_peak) { new_peak = ne; new_peak_r = rad16; } + } + + printf("## tan Radian: OLD peak at r16=%d (%.6f rad, %.4f deg), NEW peak at r16=%d (%.6f rad, %.4f deg)\n\n", + (int)old_peak_r, old_peak_r / 65536.0, old_peak_r / 65536.0 * 180.0 / M_PI, + (int)new_peak_r, new_peak_r / 65536.0, new_peak_r / 65536.0 * 180.0 / M_PI); + + /* dump around OLD peak */ + printf("### OLD peak neighborhood (r16=%d ± 20)\n\n", (int)old_peak_r); + printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); + printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); + for (rad16 = old_peak_r - 20; rad16 <= old_peak_r + 20; rad16++) { + double angle = (double)rad16 / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) continue; + double ov = fr2d(fr_tan(rad16, 16), 16); + double nv = fr2d(fr_tan32(rad16, 16), 16); + printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", + (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), + (rad16 == old_peak_r) ? " <-- OLD PEAK" : (rad16 == new_peak_r) ? " <-- NEW PEAK" : ""); + } + + if (abs((int)(new_peak_r - old_peak_r)) > 25) { + printf("\n### NEW peak neighborhood (r16=%d ± 20)\n\n", (int)new_peak_r); + printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); + printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); + for (rad16 = new_peak_r - 20; rad16 <= new_peak_r + 20; rad16++) { + double angle = (double)rad16 / 65536.0; + double ref = tan(angle); + if (fabs(ref) > 500.0) continue; + double ov = fr2d(fr_tan(rad16, 16), 16); + double nv = fr2d(fr_tan32(rad16, 16), 16); + printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", + (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), + (rad16 == new_peak_r) ? " <-- NEW PEAK" : ""); + } + } + + printf("\n"); +} + +/*======================================================= + * Degree peak finder + neighborhood dump + */ +static void peak_tan_deg(void) +{ + s16 deg; + s16 old_peak_d = 0, new_peak_d = 0; + double old_peak = 0, new_peak = 0; + + for (deg = -180; deg <= 179; deg++) { + double ref = tan((double)deg * M_PI / 180.0); + if (fabs(ref) > 500.0) continue; + double ov = fr2d(FR_TanI(deg), 16); + double nv = fr2d(fr_tan_deg32(deg, 0), 16); + double oe = tan_err(ov, ref); + double ne = tan_err(nv, ref); + if (oe > old_peak) { old_peak = oe; old_peak_d = deg; } + if (ne > new_peak) { new_peak = ne; new_peak_d = deg; } + } + + printf("## tan Degree: OLD peak at %d deg, NEW peak at %d deg\n\n", + (int)old_peak_d, (int)new_peak_d); + + /* dump full range around both peaks, ±20 deg */ + s16 lo = old_peak_d < new_peak_d ? old_peak_d : new_peak_d; + s16 hi = old_peak_d > new_peak_d ? old_peak_d : new_peak_d; + lo = (lo - 20 < -180) ? -180 : lo - 20; + hi = (hi + 20 > 179) ? 179 : hi + 20; + + printf("### Neighborhood (%d .. %d deg)\n\n", (int)lo, (int)hi); + printf("| deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); + printf("|------|----------------|----------------|-------------|----------------|-------------|\n"); + for (deg = lo; deg <= hi; deg++) { + double ref = tan((double)deg * M_PI / 180.0); + if (fabs(ref) > 500.0) { printf("| %4d | (pole) | | | | |\n", (int)deg); continue; } + double ov = fr2d(FR_TanI(deg), 16); + double nv = fr2d(fr_tan_deg32(deg, 0), 16); + printf("| %4d | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", + (int)deg, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), + (deg == old_peak_d && deg == new_peak_d) ? " <-- BOTH PEAK" : + (deg == old_peak_d) ? " <-- OLD PEAK" : + (deg == new_peak_d) ? " <-- NEW PEAK" : ""); + } + printf("\n"); +} + +int main(void) +{ + printf("# Peak Error Neighborhoods for Tangent Functions\n\n"); + peak_tan_bam(); + peak_tan_rad(); + peak_tan_deg(); + return 0; +} diff --git a/tests/test_tan32_sweep.c b/tests/test_tan32_sweep.c new file mode 100644 index 0000000..99dc83f --- /dev/null +++ b/tests/test_tan32_sweep.c @@ -0,0 +1,318 @@ +/* + * test_tan32_sweep.c - Comprehensive -65536..+65536 sweep for all tan/atan functions + * + * Generates a single comparison table: old vs new, BAM / radian / degree, + * with peak error, avg error, and speed for each function. + * + * Compile: + * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32_sweep.c -lm -o build/test_tan32_sweep + * + * @author M A Chatterjee + */ + +#include +#include +#include +#include +#include "../src/FR_math.h" + +extern s32 fr_tan_bam32(u16 bam); +extern s32 fr_tan32(s32 rad, u16 radix); +extern s32 fr_tan_deg32(s32 deg, u16 radix); +extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix); + +static double fr2d(s32 val, int radix) { + return (double)val / (double)(1L << radix); +} + +typedef struct { + double peak_err; + double sum_err; + int count; +} stats_t; + +static void stats_init(stats_t *s) { s->peak_err = 0; s->sum_err = 0; s->count = 0; } +static void stats_add(stats_t *s, double err) { + if (err > s->peak_err) s->peak_err = err; + s->sum_err += err; + s->count++; +} +static double stats_avg(stats_t *s) { return s->count > 0 ? s->sum_err / s->count : 0; } + +/*======================================================= + * Speed measurement helper + */ +static double measure_ns(void (*fn)(volatile s32 *sink, int n), int n) { + volatile s32 sink = 0; + clock_t start, end; + /* warm up */ + fn(&sink, n / 10); + start = clock(); + fn(&sink, n); + end = clock(); + return (double)(end - start) / CLOCKS_PER_SEC * 1e9 / n; +} + +/* Speed test functions */ +static void speed_tan_bam_old(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_tan_bam((u16)(i & 0xFFFF)); +} +static void speed_tan_bam_new(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_tan_bam32((u16)(i & 0xFFFF)); +} +static void speed_tan_rad_old(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_tan((s32)((i * 7) - n * 3), 16); +} +static void speed_tan_rad_new(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_tan32((s32)((i * 7) - n * 3), 16); +} +static void speed_tan_deg_old(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += FR_TanI((s16)(i % 360)); +} +static void speed_tan_deg_new(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_tan_deg32((s16)(i % 360), 0); +} + +static s32 g_xs[256], g_ys[256]; +static void init_atan_data(void) { + int i; + for (i = 0; i < 256; i++) { + double a = (double)i * 2.0 * M_PI / 256.0; + g_xs[i] = (s32)(10.0 * cos(a) * 65536.0); + g_ys[i] = (s32)(10.0 * sin(a) * 65536.0); + } +} +static void speed_atan2_old(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += FR_atan2(g_ys[i & 0xFF], g_xs[i & 0xFF], 16); +} +static void speed_atan2_new(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += fr_atan2_32(g_ys[i & 0xFF], g_xs[i & 0xFF], 16); +} +static void speed_atan_old(volatile s32 *sink, int n) { + int i; for (i = 0; i < n; i++) *sink += FR_atan((s32)((i * 13) - n * 6), 16, 16); +} +static void speed_atan_new(volatile s32 *sink, int n) { + /* FR_atan(x, r, or) = FR_atan2(x, 1< 500.0) continue; + + ov = fr2d(fr_tan_bam((u16)bam), 16); + nv = fr2d(fr_tan_bam32((u16)bam), 16); + + if (fabs(ref) > 0.001) { + oe = fabs((ov - ref) / ref) * 100.0; + ne = fabs((nv - ref) / ref) * 100.0; + } else { + oe = fabs(ov - ref) * 100.0; + ne = fabs(nv - ref) * 100.0; + } + stats_add(old_s, oe); + stats_add(new_s, ne); + } +} + +static void sweep_tan_rad(stats_t *old_s, stats_t *new_s) +{ + s32 rad16; + stats_init(old_s); + stats_init(new_s); + + /* Sweep s15.16 radians from -65536 to +65535 (= -1.0 to +1.0 rad ≈ ±57 deg). + * Step by 1 LSB = full 131072-point sweep. */ + for (rad16 = -65536; rad16 <= 65535; rad16++) { + double angle = (double)rad16 / 65536.0; + double ref = tan(angle); + double ov, nv, oe, ne; + if (fabs(ref) > 500.0) continue; + + ov = fr2d(fr_tan(rad16, 16), 16); + nv = fr2d(fr_tan32(rad16, 16), 16); + + if (fabs(ref) > 0.001) { + oe = fabs((ov - ref) / ref) * 100.0; + ne = fabs((nv - ref) / ref) * 100.0; + } else { + oe = fabs(ov - ref) * 100.0; + ne = fabs(nv - ref) * 100.0; + } + stats_add(old_s, oe); + stats_add(new_s, ne); + } +} + +static void sweep_tan_deg(stats_t *old_s, stats_t *new_s) +{ + s16 deg; + stats_init(old_s); + stats_init(new_s); + + for (deg = -180; deg <= 179; deg++) { + double ref = tan((double)deg * M_PI / 180.0); + double ov, nv, oe, ne; + if (fabs(ref) > 500.0) continue; + + ov = fr2d(FR_TanI(deg), 16); + nv = fr2d(fr_tan_deg32(deg, 0), 16); + + if (fabs(ref) > 0.001) { + oe = fabs((ov - ref) / ref) * 100.0; + ne = fabs((nv - ref) / ref) * 100.0; + } else { + oe = fabs(ov - ref) * 100.0; + ne = fabs(nv - ref) * 100.0; + } + stats_add(old_s, oe); + stats_add(new_s, ne); + } +} + +/*======================================================= + * Atan sweeps + */ +static void sweep_atan2(stats_t *old_s, stats_t *new_s) +{ + int ri, ai; + static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 }; + stats_init(old_s); + stats_init(new_s); + + for (ri = 0; ri < 5; ri++) { + double r = radii[ri]; + for (ai = 0; ai < 65536; ai++) { + double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI; + double fx = r * cos(angle), fy = r * sin(angle); + s32 x = (s32)(fx * 65536.0), y = (s32)(fy * 65536.0); + double ref = atan2(fy, fx); + double ov, nv, oe, ne; + if (x == 0 && y == 0) continue; + + ov = fr2d(FR_atan2(y, x, 16), 16); + nv = fr2d(fr_atan2_32(y, x, 16), 16); + + oe = fabs(ov - ref); ne = fabs(nv - ref); + if (oe > M_PI) oe = 2.0 * M_PI - oe; + if (ne > M_PI) ne = 2.0 * M_PI - ne; + oe = oe / M_PI * 100.0; + ne = ne / M_PI * 100.0; + + stats_add(old_s, oe); + stats_add(new_s, ne); + } + } +} + +static void sweep_atan(stats_t *old_s, stats_t *new_s) +{ + s32 x16; + stats_init(old_s); + stats_init(new_s); + + /* Sweep atan input from -65536 to +65535 (= -1.0 to +1.0 in s15.16). + * Step by 8 to keep runtime reasonable (16384 points). + * Error metric: absolute angular error as % of pi/2 (atan range). */ + for (x16 = -65536; x16 <= 65535; x16 += 8) { + double xf = (double)x16 / 65536.0; + double ref = atan(xf); + double ov, nv, oe, ne; + + ov = fr2d(FR_atan(x16, 16, 16), 16); + nv = fr2d(fr_atan2_32(x16, 65536, 16), 16); + + /* Use absolute angular error / (pi/2) * 100, same approach as atan2 */ + oe = fabs(ov - ref) / (M_PI / 2.0) * 100.0; + ne = fabs(nv - ref) / (M_PI / 2.0) * 100.0; + + stats_add(old_s, oe); + stats_add(new_s, ne); + } +} + +/*======================================================= + * Main + */ +int main(void) +{ + stats_t old_s, new_s; + double old_ns, new_ns; + int N = 1000000; + + init_atan_data(); + + printf("# Comprehensive Function Comparison: Old vs New\n\n"); + printf("Sweep range: full domain for each input type\n"); + printf("Error metric: relative %% (or absolute*100 near zero)\n"); + printf("Speed: ns/call on this platform\n\n"); + + printf("## Tangent Functions\n\n"); + printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n"); + printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n"); + + sweep_tan_bam(&old_s, &new_s); + old_ns = measure_ns(speed_tan_bam_old, N); + new_ns = measure_ns(speed_tan_bam_new, N); + printf("| tan_bam (BAM) | OLD | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n", + old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); + printf("| tan_bam32 (BAM) | NEW | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n", + new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); + + sweep_tan_rad(&old_s, &new_s); + old_ns = measure_ns(speed_tan_rad_old, N); + new_ns = measure_ns(speed_tan_rad_new, N); + printf("| fr_tan (rad@r16) | OLD | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n", + old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); + printf("| fr_tan32 (rad@r16) | NEW | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n", + new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); + + sweep_tan_deg(&old_s, &new_s); + old_ns = measure_ns(speed_tan_deg_old, N); + new_ns = measure_ns(speed_tan_deg_new, N); + printf("| FR_TanI (deg) | OLD | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n", + old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); + printf("| fr_tan_deg32 (deg) | NEW | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n", + new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); + + printf("\n## Inverse Tangent Functions\n\n"); + printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n"); + printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n"); + + sweep_atan2(&old_s, &new_s); + old_ns = measure_ns(speed_atan2_old, N / 2); + new_ns = measure_ns(speed_atan2_new, N / 2); + printf("| FR_atan2 (s15.16) | OLD | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n", + old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); + printf("| fr_atan2_32(s15.16)| NEW | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n", + new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); + + sweep_atan(&old_s, &new_s); + old_ns = measure_ns(speed_atan_old, N / 2); + new_ns = measure_ns(speed_atan_new, N / 2); + printf("| FR_atan (s15.16) | OLD | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n", + old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); + printf("| atan2_32(x,1) eq. | NEW | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n", + new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); + + printf("\n## Notes\n\n"); + printf("- BAM sweep: 0..65535 (full circle, excludes |tan|>500 near poles)\n"); + printf("- Radian sweep: -65536..+65535 at radix 16 = -1.0..+1.0 rad = +/-57.3 deg\n"); + printf("- Degree sweep: -180..+179 integer degrees\n"); + printf("- atan2 error: %% of pi (angular error / pi * 100)\n"); + printf("- atan error: absolute angular error / (pi/2) * 100%%\n"); + printf("- atan2_32(x,1) is used as the NEW atan since it's equivalent to atan(x)\n"); + + return 0; +} diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp index 5a70a0a..3daaff9 100644 --- a/tests/test_tdd.cpp +++ b/tests/test_tdd.cpp @@ -89,8 +89,7 @@ static void stats_add(stats_t *s, double in, double actual, double expected) { s->worst_expected = expected; } s->sum_abs_err += e; - /* Skip percent error when expected ≈ 0 to avoid division artifacts */ - double pct = (fabs(expected) > 0.01) ? (e / fabs(expected)) * 100.0 : 0.0; + double pct = (expected != 0.0) ? (e / fabs(expected)) * 100.0 : (e != 0.0 ? 100.0 : 0.0); if (pct > s->max_pct_err) { s->max_pct_err = pct; s->worst_pct_input = in; @@ -109,6 +108,21 @@ static double stats_mean_pct(const stats_t *s) { return s->n ? s->sum_pct_err / s->n : 0.0; } +/* Quantize a double to s15.16 resolution (same grid as library output). */ +static inline double q16(double x) { + return floor(x * 65536.0 + 0.5) / 65536.0; +} + +/* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */ +static const double TAN_CLAMP = (double)0x7fffffff / (double)(1L << 16); + +static double tan_ref(double rad) { + double t = tan(rad); + if (t > TAN_CLAMP) return TAN_CLAMP; + if (t < -TAN_CLAMP) return -TAN_CLAMP; + return t; +} + /* Set by FR_SHOWPEAK env var — adds a "Peak at" column to the accuracy table */ static int g_showpeak = 0; @@ -1725,10 +1739,11 @@ static void section_summary(void) { printf("| FR_FixMulSat | OK | 4.2, 4.3 | int64 fast path with round-to-nearest and explicit saturation |\n"); printf("| FR_FixAddSat | OK | 4.4, 4.5 | Saturation behaves identically on LP64 host and ILP32 MCU |\n"); printf("| FR_CosI / FR_SinI | OK | 5 | s15.16 output; exact at poles; max abs error ~1.5e-5 (1 LSB s15.16) over [-720, +720]; macros routing to fr_*_bam |\n"); - printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | Routed through BAM trig |\n"); + printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | BAM table lookup; 65-entry octant table; no 64-bit division |\n"); printf("| FR_Cos / FR_Sin (interpolated) | OK | 6.1 | Within LSB-level error for r8 inputs in s16 |\n"); - printf("| FR_Tan (interpolated) | OK | 6.2 | Locals are s32 |\n"); + printf("| FR_Tan (interpolated) | OK | 6.2 | Via fr_tan_bam; 65-entry octant table |\n"); printf("| fr_cos / fr_sin / fr_cos_bam / fr_sin_bam / fr_cos_deg / fr_sin_deg | OK | 6 | s15.16 output; 129-entry quadrant table with round-to-nearest linear interp; exact at cardinal angles |\n"); + printf("| fr_tan_bam | OK | 14 | 65-entry octant table; first-octant lerp, second-octant 32-bit reciprocal; no 64-bit |\n"); printf("| FR_acos | OK | 7.1 | Max error ~0.83° over [-1, +1] swept at 200 points |\n"); printf("| FR_asin | OK | 7.2 | Same precision as FR_acos |\n"); printf("| FR_atan2 | OK | 7.3 | Via asin/acos + hypot_fast8; 129-entry cos table; `FR_atan2(y, x, out_radix)` returns radians |\n"); @@ -1796,51 +1811,98 @@ static void section_accuracy_table(void) { /* Persistent stats so we can print diagnostics after the table */ stats_t st_sincos, st_tan, st_asincos, st_atan2; + stats_t st_rad2bam, st_deg2bam, st_sincos_deg_s32, st_tan_deg_s32; stats_reset(&st_sincos); stats_reset(&st_tan); stats_reset(&st_asincos); stats_reset(&st_atan2); + stats_reset(&st_rad2bam); stats_reset(&st_deg2bam); + stats_reset(&st_sincos_deg_s32); stats_reset(&st_tan_deg_s32); + + /* --- sin / cos (BAM native: 65536-pt) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + u16 bam = (u16)i; + double rad = bam * 2.0 * M_PI / 65536.0; + stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad))); + stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad))); + } + acc_row("sin/cos (BAM)", &st, "fr_sin_bam/fr_cos_bam direct; 129-entry table"); + } - /* --- sin / cos --- */ + /* --- sin / cos (degree wrappers: 65536-pt) --- */ { stats_t &st = st_sincos; const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */ - /* 65536-point sweep: all s16 values at radix 7 cover > full circle */ for (int i = -32768; i <= 32767; i++) { double deg = (double)i / (1 << radix); double rad = deg * M_PI / 180.0; - stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), sin(rad)); - stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), cos(rad)); + stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), q16(sin(rad))); + stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), q16(cos(rad))); } - /* Special cases: exact integer degrees including negative */ s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360, -30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), sin(rad)); - stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), cos(rad)); + stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad))); + stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad))); + } + acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±256° (s16 at radix 7; FR_DEG2BAM)"); + } + + /* --- sin / cos (radian wrappers: 65536-pt) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = (s32)(angle * (1L << 16)); + stats_add(&st, angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(angle))); + stats_add(&st, angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(angle))); + } + acc_row("sin/cos (rad)", &st, "fr_sin/fr_cos via fr_rad_to_bam ±2π r16"); + } + + /* --- tan (BAM native: 65536-pt, full sweep) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + u16 bam = (u16)i; + double ref; + if (bam == 16384) ref = TAN_CLAMP; /* 90°: +maxint */ + else if (bam == 49152) ref = -TAN_CLAMP; /* 270°: -maxint */ + else ref = tan_ref(bam * 2.0 * M_PI / 65536.0); + stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref)); } - acc_row("sin / cos", &st, "65536-pt sweep + specials"); + acc_row("tan (BAM)", &st, "fr_tan_bam 65536-pt full; ±maxint at poles"); } - /* --- tan --- */ + /* --- tan (degree wrappers: 65536-pt, full sweep) --- */ { stats_t &st = st_tan; const u16 radix = 7; for (int i = -32768; i <= 32767; i++) { double deg = (double)i / (1 << radix); double rad = deg * M_PI / 180.0; - /* Skip near poles: |cos| < 0.01 → tan > 100 */ - if (fabs(cos(rad)) < 0.01) continue; - stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), tan(rad)); + stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); } - /* Special cases: integer degrees (avoiding poles) */ s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), tan(rad)); + stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); } - acc_row("tan", &st, "65536-pt sweep (skip poles)"); + acc_row("tan (deg)", &st, "FR_Tan ±256° full (s16 at radix 7; FR_DEG2BAM); sat at poles"); + } + + /* --- tan (radian wrappers: 65536-pt, full sweep) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = (s32)(angle * (1L << 16)); + stats_add(&st, angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(angle))); + } + acc_row("tan (rad)", &st, "fr_tan ±2π r16 full; sat at poles"); } /* --- asin / acos --- */ @@ -1851,9 +1913,9 @@ static void section_accuracy_table(void) { double xd = (double)i / (1 << 15); if (xd < -1.0 || xd > 1.0) continue; s32 rad = FR_asin((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), asin(xd)); + stats_add(&st, xd, frd(rad, R), q16(asin(xd))); rad = FR_acos((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), acos(xd)); + stats_add(&st, xd, frd(rad, R), q16(acos(xd))); } acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary"); } @@ -1886,7 +1948,7 @@ static void section_accuracy_table(void) { /* Skip near ±pi branch cut: sign depends on sub-LSB * input quantization, not algorithm accuracy. */ if (fabs(fabs(ref) - M_PI) < 0.01) continue; - stats_add(&st, angle * 180.0 / M_PI, frd(r, R), ref); + stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref)); } } /* Special cases: exact quadrant/octant/30-degree angles */ @@ -1898,7 +1960,7 @@ static void section_accuracy_table(void) { s32 fx = (s32)(x * scale), fy = (s32)(y * scale); if (fx == 0 && fy == 0) continue; s32 r = FR_atan2(fy, fx, R); - stats_add(&st, specials_deg[si], frd(r, R), atan2(y, x)); + stats_add(&st, specials_deg[si], frd(r, R), q16(atan2(y, x))); } acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8"); } @@ -1906,18 +1968,14 @@ static void section_accuracy_table(void) { /* --- atan --- */ { stats_t st; stats_reset(&st); - /* Sweep atan(x) for x in [-10, 10] with fine steps near zero. - * FR_atan(input, radix, out_radix) calls FR_atan2(input, 1< 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */ - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, x, frd(r, R), q16(ref)); } acc_row("exp", &st, "FR_MULK28 + FR_pow2"); } @@ -2013,7 +2071,7 @@ static void section_accuracy_table(void) { s32 r = FR_EXP_FAST(fr, R); double ref = exp(x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, x, frd(r, R), q16(ref)); } acc_row("exp_fast", &st, "Shift-only scaling"); } @@ -2027,7 +2085,7 @@ static void section_accuracy_table(void) { s32 r = FR_POW10(fr, R); double ref = pow(10.0, x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, x, frd(r, R), q16(ref)); } acc_row("pow10", &st, "FR_MULK28 + FR_pow2"); } @@ -2041,7 +2099,7 @@ static void section_accuracy_table(void) { s32 r = FR_POW10_FAST(fr, R); double ref = pow(10.0, x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, x, frd(r, R), q16(ref)); } acc_row("pow10_fast", &st, "Shift-only scaling"); } @@ -2058,7 +2116,7 @@ static void section_accuracy_table(void) { s32 fy = (s32)(cases[i].y * scale); s32 r = FR_hypot(fx, fy, R); double ref = hypot(cases[i].x, cases[i].y); - stats_add(&st, ref, frd(r, R), ref); + stats_add(&st, ref, frd(r, R), q16(ref)); } acc_row("hypot (exact)", &st, "64-bit intermediate"); } @@ -2075,7 +2133,7 @@ static void section_accuracy_table(void) { s32 fy = (s32)(cases[i].y * scale); s32 r = FR_hypot_fast8(fx, fy); double ref = hypot(cases[i].x, cases[i].y); - if (ref > 0) stats_add(&st, ref, frd(r, R), ref); + if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref)); } acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply"); } @@ -2083,6 +2141,175 @@ static void section_accuracy_table(void) { printf("\n"); printf("\n"); + /* ── Test-only rows (not library functions — conversion & pipeline checks) ── */ + md_h3("14.0.1 Conversion & pipeline accuracy (test-only)"); + printf("| Function | Max err (%%) | Avg err (%%) | Note |\n"); + printf("|---|---:|---:|---|\n"); + + /* --- rad→BAM conversion (standalone: 65536-pt) --- */ + { + stats_t &st = st_rad2bam; + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = (s32)(angle * scale); + u16 got = fr_rad_to_bam(rad_fp, 16); + /* Exact BAM: wrap to u16 */ + double exact_bam_d = angle * 65536.0 / (2.0 * M_PI); + s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5); + u16 expected = (u16)(exact_bam_s & 0xFFFF); + /* Feed stats as degrees so the error is interpretable */ + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, angle, got_deg, exp_deg); + } + { + char note[128]; + snprintf(note, sizeof(note), + "fr_rad_to_bam() ±2π at r16; max %d BAM LSB", + (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5)); + acc_row("rad→BAM conv", &st, note); + } + } + + /* --- deg→BAM conversion (standalone: 65536-pt) --- */ + { + stats_t &st = st_deg2bam; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = (s32)(deg * scale); + u16 got = fr_deg_to_bam(deg_fp, 16); + /* Exact BAM: wrap to u16 */ + double exact_bam_d = deg * 65536.0 / 360.0; + s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5); + u16 expected = (u16)(exact_bam_s & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, deg, got_deg, exp_deg); + } + { + char note[128]; + snprintf(note, sizeof(note), + "fr_deg_to_bam() ±360° at r16; max %d BAM LSB", + (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5)); + acc_row("deg→BAM conv", &st, note); + } + } + + /* --- sin / cos via integer degrees ±360° --- */ + { + stats_t &st = st_sincos_deg_s32; + for (int deg = -360; deg <= 360; deg++) { + double rad = deg * M_PI / 180.0; + stats_add(&st, (double)deg, frd(fr_sin_deg(deg), FR_TRIG_OUT_PREC), q16(sin(rad))); + stats_add(&st, (double)deg, frd(fr_cos_deg(deg), FR_TRIG_OUT_PREC), q16(cos(rad))); + } + acc_row("sin/cos (int deg)", &st, "fr_sin_deg/fr_cos_deg ±360° integer degrees"); + } + + /* --- tan via integer degrees ±360° --- */ + { + stats_t &st = st_tan_deg_s32; + for (int deg = -360; deg <= 360; deg++) { + double rad = deg * M_PI / 180.0; + stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); + } + acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles"); + } + + /* --- Conversion macro accuracy (all 6 direction macros) --- */ + + /* FR_RAD2BAM macro: test within safe range (±pi at r16) */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -M_PI + (2.0 * M_PI * i / 65536.0); + s32 rad_fp = (s32)(angle * scale); + s32 raw = FR_RAD2BAM(rad_fp); + u16 got = (u16)((raw + (1 << 15)) >> 16); + double exact_d = angle * 65536.0 / (2.0 * M_PI); + u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, angle, got_deg, exp_deg); + } + acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad"); + } + + /* FR_DEG2BAM macro: test within safe range (±180° at r7) */ + { + stats_t st; stats_reset(&st); + const u16 radix = 7; + for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */ + double deg = (double)i / (1 << radix); + s32 raw = FR_DEG2BAM((s32)i); + u16 got = (u16)((raw + (1 << (radix - 1))) >> radix); + double exact_d = deg * 65536.0 / 360.0; + u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, deg, got_deg, exp_deg); + } + acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°"); + } + + /* FR_BAM2RAD macro: multiplies by 2π/65536 using shifts. + * BAM 0..32767 at r16 (upper half overflows s32 when <<16). */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 32768; i++) { + s32 bam_r16 = (s32)i << 16; + s32 rad_fp = FR_BAM2RAD(bam_r16); + double got_rad = frd(rad_fp, 16); + double exp_rad = (double)i * 2.0 * M_PI / 65536.0; + stats_add(&st, (double)i, got_rad, exp_rad); + } + acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)"); + } + + /* FR_BAM2DEG macro: multiplies by 360/65536 using shifts. + * BAM 0..32767 at r16 (same s32 overflow limit). */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 32768; i++) { + s32 bam_r16 = (s32)i << 16; + s32 deg_fp = FR_BAM2DEG(bam_r16); + double got_deg = frd(deg_fp, 16); + double exp_deg = (double)i * 360.0 / 65536.0; + stats_add(&st, (double)i, got_deg, exp_deg); + } + acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)"); + } + + /* FR_DEG2RAD macro: 65536-pt ±360° at r16 full */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = (s32)(deg * scale); + s32 rad_fp = FR_DEG2RAD(deg_fp); + double got_rad = frd(rad_fp, 16); + double exp_rad = deg * M_PI / 180.0; + stats_add(&st, deg, got_rad, exp_rad); + } + acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full"); + } + + /* FR_RAD2DEG macro: 65536-pt ±2π at r16 full */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = (s32)(angle * scale); + s32 deg_fp = FR_RAD2DEG(rad_fp); + double got_deg = frd(deg_fp, 16); + double exp_deg = angle * 180.0 / M_PI; + stats_add(&st, angle, got_deg, exp_deg); + } + acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full"); + } + + printf("\n"); + /* Diagnostic: show where each trig function's worst % error occurs */ md_h3("14.1 Worst-case percent error diagnostics"); printf("Shows the input that produced the maximum %% error for each trig function.\n"); @@ -2092,10 +2319,14 @@ static void section_accuracy_table(void) { printf("|---|---|---:|---:|---:|---:|\n"); struct { const char *name; stats_t *s; } diag[] = { - {"sin / cos", &st_sincos}, - {"tan", &st_tan}, - {"asin/acos", &st_asincos}, - {"atan2", &st_atan2}, + {"sin / cos", &st_sincos}, + {"tan", &st_tan}, + {"rad→BAM conv", &st_rad2bam}, + {"deg→BAM conv", &st_deg2bam}, + {"sin/cos (int deg)",&st_sincos_deg_s32}, + {"tan (int deg)", &st_tan_deg_s32}, + {"asin/acos", &st_asincos}, + {"atan2", &st_atan2}, }; for (int d = 0; d < (int)(sizeof(diag)/sizeof(diag[0])); d++) { stats_t *s = diag[d].s; From 78fcf966609acef6b4f27b0f7fe9dd044be952ea Mon Sep 17 00:00:00 2001 From: deftio Date: Wed, 29 Apr 2026 20:50:35 -0700 Subject: [PATCH 2/7] updated tan bam --- src/FR_math.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/FR_math.c b/src/FR_math.c index dce131b..3a7a1d4 100644 --- a/src/FR_math.c +++ b/src/FR_math.c @@ -161,8 +161,17 @@ s32 fr_tan_bam(u16 bam) lo = (s32)gFR_TAN_TAB_O[idx]; hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - /* raw is in u0.15. Shift to s15.16. */ - raw <<= 1; + + if (raw < 0x40) { + /* Near zero: redo interpolation with 4 extra bits of + * precision to reduce rounding error when result is small. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } } else { /* Second octant: tan(x) = 1 / tan(90° - x). * complement is in (0, 0x2000] = (0°, 45°]. */ @@ -175,13 +184,19 @@ s32 fr_tan_bam(u16 bam) hi = (s32)gFR_TAN_TAB_O[idx + 1]; raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); - /* raw is tan(complement) in u0.15. Compute 1/raw in s15.16. - * 1.0 in s15.16 = 0x10000. We want (1<<16) / (raw_in_0.15) - * = (1<<16) * (1<<15) / raw_raw = (1<<31) / raw. - * Use unsigned to avoid overflow: 0x80000000 / raw. */ - if (raw < 2) { - /* Near pole: saturate */ - raw = FR_TRIG_MAXVAL; + if (raw < 0x40) { + /* Near pole: redo interpolation with 4 extra bits of + * precision. The reciprocal amplifies small interpolation + * errors, so extra precision significantly helps here. + * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + if (raw_hp < 32) { + raw = FR_TRIG_MAXVAL; + } else { + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } } else { raw = (s32)(0x80000000u / (u32)raw); } From 1104f2eeeb163ee607331b2a7ce07f91e7bc55ac Mon Sep 17 00:00:00 2001 From: deftio Date: Sun, 3 May 2026 14:53:13 -0700 Subject: [PATCH 3/7] minor docs and accuracy tune up --- README.md | 64 ++- compare_lfm/comparison_results.json | 479 ++++++++++++++++ compare_lfm/comparison_summary.md | 42 +- docs/README.md | 69 +-- docs/api-reference.md | 48 +- docs/building.md | 2 +- docs/examples.md | 6 +- docs/fixed-point-primer.md | 6 +- keywords.txt | 9 +- makefile | 17 + pages/guide/api-reference.html | 45 +- pages/guide/building.html | 2 +- pages/guide/examples.html | 7 +- pages/guide/fixed-point-primer.html | 6 +- pages/index.html | 79 +-- scripts/accuracy_report.sh | 8 +- src/FR_math.c | 519 +++++++++++++----- src/FR_math.h | 123 ++--- src/FR_trig_table.h | 50 +- tests/test_tdd.cpp | 815 +++++++++++++++++++++------- tools/README.md | 131 +++++ tools/trig_neighborhood.cpp | 519 ++++++++++++++++++ 22 files changed, 2458 insertions(+), 588 deletions(-) create mode 100644 compare_lfm/comparison_results.json create mode 100644 tools/README.md create mode 100644 tools/trig_neighborhood.cpp diff --git a/README.md b/README.md index 357bd5b..784535c 100644 --- a/README.md +++ b/README.md @@ -27,33 +27,34 @@ beyond ``. Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. -Percent errors skip expected values near zero (|expected| < 0.01). At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the number of fractional bits available. All functions support radix 0 to 30. -| Function | Max err (%) | Avg err (%) | Note | -|---|---:|---:|---| -| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table | -| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials | -| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 | -| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table | -| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials | -| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 | -| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0003 | 0.0000 | Round-to-nearest | -| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | -| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | -| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | -| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | -| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | -| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | +| Function | Max err (%)*| Avg err (%) | Peak at | Note | +|---|---:|---:|---:|---| +| sin/cos (BAM) | 0.4578 | 0.0076 | 94 | fr_sin_bam/fr_cos_bam direct; 129-entry table | +| sin/cos (deg) | 0.4578 | 0.0076 | -359.5 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | +| sin/cos (rad) | 0.6104 | 0.0085 | -4.721 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | +| tan (BAM) | 0.5823 | 0.0008 | 16360 | fr_tan_bam 65536-pt full; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | -270.1 | FR_Tan ±360° s15.16 full; sat at poles | +| tan (rad) | 13.4069 | 0.0029 | -4.713 | fr_tan ±2π r16 full; sat at poles | +| asin / acos | 0.8743 | 0.0301 | 0.0123 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.5100 | 0.0237 | -2.571 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.3390 | 0.0154 | -0.018 | 20001-pt full sweep [-10,10]; via FR_atan2 | +| sqrt | 0.0239 | 0.0000 | 0.0001 | Round-to-nearest | +| log2 | 0.0286 | 0.0029 | 0.895 | 65-entry mantissa table | +| pow2 | 0.0019 | 0.0003 | 3.36 | 65-entry fraction table | +| ln, log10 | 0.0004 | 0.0000 | 50 | Via FR_MULK28 from log2 | +| exp | 0.0003 | 0.0000 | 3.91 | FR_MULK28 + FR_pow2 | +| exp_fast | 0.0009 | 0.0001 | 3.92 | Shift-only scaling | +| pow10 | 0.0007 | 0.0000 | 1.97 | FR_MULK28 + FR_pow2 | +| pow10_fast | 0.0028 | 0.0002 | 1.99 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | 0 | 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | 1000 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. "Peak at" = input that produced max error. ### What's in the box @@ -62,8 +63,8 @@ number of fractional bits available. All functions support radix 0 to 30. |---|---| | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | -| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` | +| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` | +| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` | | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | @@ -166,18 +167,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */ * * MixedCase FR_ names are functions — they contain loops, tables, or * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave + * generators, ADSR envelopes): + * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... + * + * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are + * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code + * should use the fr_ names directly. * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json new file mode 100644 index 0000000..b9c1b3b --- /dev/null +++ b/compare_lfm/comparison_results.json @@ -0,0 +1,479 @@ +{ + "description": "FR_math vs libfixmath benchmark — both measured against math.h double precision (IEEE 754)", + "gold_standard": " IEEE 754 double precision (~15 significant digits)", + "fixed_point_format": "Q16.16 (s15.16), 1 LSB = 1.52587890625000e-05", + "accuracy_points": 65536, + "timing_iterations": 100000, + "rel_error_threshold": 0.01, + "platform": "macOS ARM (Apple Silicon)", + "optimization": "-O2", + "results": [ + { + "function": "sin", + "double_reference": "std::sin", + "sweep": "65536-pt, [-pi, +pi]", + "speed": { + "fr_math_ns_per_call": 5.6, + "libfixmath_ns_per_call": 10.6, + "fr_math_speedup": 1.91, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 7.40851348e-05, + "mean_abs_error": 1.88938357e-05, + "max_error_lsb": 4.9, + "mean_error_lsb": 1.2, + "max_rel_error_pct": 0.4816, + "mean_rel_error_pct": 0.0081 + }, + "libfixmath": { + "max_abs_error": 7.74511497e-03, + "mean_abs_error": 5.34549003e-04, + "max_error_lsb": 507.6, + "mean_error_lsb": 35.0, + "max_rel_error_pct": 74.5513, + "mean_rel_error_pct": 0.6105 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "cos", + "double_reference": "std::cos", + "sweep": "65536-pt, [-pi, +pi]", + "speed": { + "fr_math_ns_per_call": 8.9, + "libfixmath_ns_per_call": 13.3, + "fr_math_speedup": 1.50, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 6.76591807e-05, + "mean_abs_error": 2.03740409e-05, + "max_error_lsb": 4.4, + "mean_error_lsb": 1.3, + "max_rel_error_pct": 0.3282, + "mean_rel_error_pct": 0.0077 + }, + "libfixmath": { + "max_abs_error": 7.75591931e-03, + "mean_abs_error": 5.36939114e-04, + "max_error_lsb": 508.3, + "mean_error_lsb": 35.2, + "max_rel_error_pct": 74.4001, + "mean_rel_error_pct": 0.6121 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "tan", + "double_reference": "std::tan", + "sweep": "65536-pt, [-1.2, 1.2] rad", + "speed": { + "fr_math_ns_per_call": 7.1, + "libfixmath_ns_per_call": 32.6, + "fr_math_speedup": 4.57, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 1.98158306e-04, + "mean_abs_error": 3.37019908e-05, + "max_error_lsb": 13.0, + "mean_error_lsb": 2.2, + "max_rel_error_pct": 0.1551, + "mean_rel_error_pct": 0.0055 + }, + "libfixmath": { + "max_abs_error": 1.82495961e-02, + "mean_abs_error": 8.01092905e-04, + "max_error_lsb": 1196.0, + "mean_error_lsb": 52.5, + "max_rel_error_pct": 0.7099, + "mean_rel_error_pct": 0.0410 + }, + "closer_to_double": "fr_math" + }, + "note": "Skip near pi/2" + }, + { + "function": "asin", + "double_reference": "std::asin", + "sweep": "65536-pt, [-0.999, 0.999]", + "speed": { + "fr_math_ns_per_call": 9.7, + "libfixmath_ns_per_call": 49.5, + "fr_math_speedup": 5.11, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 3.79872462e-04, + "mean_abs_error": 1.04994412e-04, + "max_error_lsb": 24.9, + "mean_error_lsb": 6.9, + "max_rel_error_pct": 1.9776, + "mean_rel_error_pct": 0.0477 + }, + "libfixmath": { + "max_abs_error": 1.01788963e-02, + "mean_abs_error": 3.64421558e-03, + "max_error_lsb": 667.1, + "mean_error_lsb": 238.8, + "max_rel_error_pct": 20.1233, + "mean_rel_error_pct": 2.4452 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "acos", + "double_reference": "std::acos", + "sweep": "65536-pt, [-0.999, 0.999]", + "speed": { + "fr_math_ns_per_call": 8.4, + "libfixmath_ns_per_call": 50.7, + "fr_math_speedup": 6.03, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 3.75418007e-04, + "mean_abs_error": 1.00708880e-04, + "max_error_lsb": 24.6, + "mean_error_lsb": 6.6, + "max_rel_error_pct": 0.2724, + "mean_rel_error_pct": 0.0093 + }, + "libfixmath": { + "max_abs_error": 1.01897006e-02, + "mean_abs_error": 3.64422377e-03, + "max_error_lsb": 667.8, + "mean_error_lsb": 238.8, + "max_rel_error_pct": 15.3142, + "mean_rel_error_pct": 0.3475 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "atan", + "double_reference": "std::atan", + "sweep": "65536-pt, [-50, 50]", + "speed": { + "fr_math_ns_per_call": 8.1, + "libfixmath_ns_per_call": 11.0, + "fr_math_speedup": 1.37, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 9.14677954e-04, + "mean_abs_error": 7.43583969e-05, + "max_error_lsb": 59.9, + "mean_error_lsb": 4.9, + "max_rel_error_pct": 0.2149, + "mean_rel_error_pct": 0.0061 + }, + "libfixmath": { + "max_abs_error": 1.01676134e-02, + "mean_abs_error": 6.15802358e-03, + "max_error_lsb": 666.3, + "mean_error_lsb": 403.6, + "max_rel_error_pct": 19.8632, + "mean_rel_error_pct": 0.4571 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "atan2", + "double_reference": "std::atan2", + "sweep": "65536-pt, 5 radii x 360 deg", + "speed": { + "fr_math_ns_per_call": 15.9, + "libfixmath_ns_per_call": 10.9, + "fr_math_speedup": 0.69, + "faster": "libfixmath" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 9.53437855e-04, + "mean_abs_error": 1.91371871e-04, + "max_error_lsb": 62.5, + "mean_error_lsb": 12.5, + "max_rel_error_pct": 0.4122, + "mean_rel_error_pct": 0.0239 + }, + "libfixmath": { + "max_abs_error": 1.01728729e-02, + "mean_abs_error": 3.88005371e-03, + "max_error_lsb": 666.7, + "mean_error_lsb": 254.3, + "max_rel_error_pct": 20.0045, + "mean_rel_error_pct": 0.9267 + }, + "closer_to_double": "fr_math" + }, + "note": "All 4 quadrants" + }, + { + "function": "sqrt", + "double_reference": "std::sqrt", + "sweep": "65536-pt, [0.01, 100]", + "speed": { + "fr_math_ns_per_call": 18.6, + "libfixmath_ns_per_call": 19.9, + "fr_math_speedup": 1.07, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 7.62924903e-06, + "mean_abs_error": 3.80582266e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.2, + "max_rel_error_pct": 0.0062, + "mean_rel_error_pct": 0.0001 + }, + "libfixmath": { + "max_abs_error": 7.62924903e-06, + "mean_abs_error": 3.80582266e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.2, + "max_rel_error_pct": 0.0062, + "mean_rel_error_pct": 0.0001 + }, + "closer_to_double": "tie" + } + }, + { + "function": "exp", + "double_reference": "std::exp", + "sweep": "65536-pt, [-5, 5]", + "speed": { + "fr_math_ns_per_call": 3.0, + "libfixmath_ns_per_call": 64.7, + "fr_math_speedup": 21.28, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 3.17909587e-03, + "mean_abs_error": 1.03218909e-04, + "max_error_lsb": 208.3, + "mean_error_lsb": 6.8, + "max_rel_error_pct": 0.1486, + "mean_rel_error_pct": 0.0078 + }, + "libfixmath": { + "max_abs_error": 3.30095957e-03, + "mean_abs_error": 9.38398029e-05, + "max_error_lsb": 216.3, + "mean_error_lsb": 6.1, + "max_rel_error_pct": 0.0756, + "mean_rel_error_pct": 0.0042 + }, + "closer_to_double": "fr_math" + } + }, + { + "function": "ln", + "double_reference": "std::log", + "sweep": "65536-pt, [0.01, 100]", + "speed": { + "fr_math_ns_per_call": 9.0, + "libfixmath_ns_per_call": 453.2, + "fr_math_speedup": 50.53, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 4.93278555e-05, + "mean_abs_error": 1.61117669e-05, + "max_error_lsb": 3.2, + "mean_error_lsb": 1.1, + "max_rel_error_pct": 0.3012, + "mean_rel_error_pct": 0.0006 + }, + "libfixmath": { + "max_abs_error": 3.40447818e-05, + "mean_abs_error": 5.14211182e-06, + "max_error_lsb": 2.2, + "mean_error_lsb": 0.3, + "max_rel_error_pct": 0.0557, + "mean_rel_error_pct": 0.0002 + }, + "closer_to_double": "libfixmath" + } + }, + { + "function": "log2", + "double_reference": "std::log2", + "sweep": "65536-pt, [0.01, 100]", + "speed": { + "fr_math_ns_per_call": 8.5, + "libfixmath_ns_per_call": 39.4, + "fr_math_speedup": 4.63, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 6.06739329e-05, + "mean_abs_error": 2.30368713e-05, + "max_error_lsb": 4.0, + "mean_error_lsb": 1.5, + "max_rel_error_pct": 0.4945, + "mean_rel_error_pct": 0.0006 + }, + "libfixmath": { + "max_abs_error": 3.56826644e-05, + "mean_abs_error": 9.96190621e-06, + "max_error_lsb": 2.3, + "mean_error_lsb": 0.7, + "max_rel_error_pct": 0.1758, + "mean_rel_error_pct": 0.0002 + }, + "closer_to_double": "libfixmath" + } + }, + { + "function": "mul", + "double_reference": "double a*b", + "sweep": "65536-pt, a in [-50,50], b in [-2,2]", + "speed": { + "fr_math_ns_per_call": 0.9, + "libfixmath_ns_per_call": 1.2, + "fr_math_speedup": 1.33, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 7.62939453e-06, + "mean_abs_error": 3.81535541e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.3, + "max_rel_error_pct": 0.0692, + "mean_rel_error_pct": 0.0004 + }, + "libfixmath": { + "max_abs_error": 7.62939453e-06, + "mean_abs_error": 3.81535541e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.3, + "max_rel_error_pct": 0.0692, + "mean_rel_error_pct": 0.0004 + }, + "closer_to_double": "tie" + } + }, + { + "function": "div", + "double_reference": "double a/b", + "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]", + "speed": { + "fr_math_ns_per_call": 0.9, + "libfixmath_ns_per_call": 5.3, + "fr_math_speedup": 6.10, + "faster": "fr_math" + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 7.62927377e-06, + "mean_abs_error": 3.82182808e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.3, + "max_rel_error_pct": 0.0727, + "mean_rel_error_pct": 0.0010 + }, + "libfixmath": { + "max_abs_error": 8.37162948e-06, + "mean_abs_error": 3.82625614e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.3, + "max_rel_error_pct": 0.0727, + "mean_rel_error_pct": 0.0010 + }, + "closer_to_double": "fr_math" + }, + "note": "Both use 64-bit intermediate" + }, + { + "function": "hypot", + "double_reference": "std::hypot", + "sweep": "65536-pt, 5 radii x 360 deg", + "speed": { + "fr_math_ns_per_call": 19.9 + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 7.62930188e-06, + "mean_abs_error": 3.67171926e-06, + "max_error_lsb": 0.5, + "mean_error_lsb": 0.2, + "max_rel_error_pct": 0.0076, + "mean_rel_error_pct": 0.0009 + } + }, + "note": "FR_math only (libfixmath has no hypot)" + }, + { + "function": "hypot_fast8", + "double_reference": "std::hypot", + "sweep": "65536-pt, 5 radii x 360 deg", + "speed": { + "fr_math_ns_per_call": 2.6 + }, + "accuracy_vs_double": { + "fr_math": { + "max_abs_error": 1.37244198e+00, + "mean_abs_error": 1.13634634e-01, + "max_error_lsb": 89944.4, + "mean_error_lsb": 7447.2, + "max_rel_error_pct": 0.1372, + "mean_rel_error_pct": 0.0516 + } + }, + "note": "FR_math only; shift-only, no multiply" + } + ], + "summary": { + "head_to_head_functions": 13, + "faster_wins": { "fr_math": 12, "libfixmath": 1 }, + "accuracy_wins": { "fr_math": 9, "libfixmath": 2, "tie": 2 }, + "total_functions_tested": 15 + }, + "notes": [ + "All accuracy measured vs IEEE 754 double. Lower = closer to perfect.", + "LSB = Q16.16 least-significant-bit = 1.53e-5. Best possible = 0.5 LSB.", + "Percent errors skip |ref| < 0.01 to avoid near-zero division spikes.", + "Both libraries use Q16.16 (s15.16): 1.0 = 65536.", + "FR_math trig: BAM + 129-entry LUT + linear interpolation.", + "libfixmath trig: parabolic approximation + 5th-order correction.", + "Timing: min of 3 passes x 100000 calls; cache-warm.", + "Speedup > 1.0 means FR_math is faster by that factor." + ], + "compiled_size_note": "Run 'make size' in .compare/ for live numbers. The values below are representative.", + "compiled_size": { + "compiler": "clang -O2 (macOS ARM)", + "fr_math": { + "files": "FR_math.c (single file)", + "functions": "trig(6), inv-trig(4), log/ln/log10, exp/pow2/pow10, exp_fast/pow10_fast, sqrt, hypot(2), waves(6), ADSR(4), print(4), format", + "rom_bytes": 7470, + "ram_bss_bytes": 0, + "note": "All tables in const ROM. Zero runtime allocation." + }, + "libfixmath": { + "files": "fix16.c, fix16_sqrt.c, fix16_exp.c, fix16_trig.c, fix16_str.c, uint32.c, fract32.c", + "functions": "trig(6), inv-trig(4), log/log2, exp, sqrt, mul/div, str", + "rom_bytes": 4912, + "ram_bss_bytes": 114688, + "rom_bytes_no_cache": 5476, + "ram_bss_bytes_no_cache": 0, + "note": "Default mode caches 112 KB of sin/exp LUTs in BSS. FIXMATH_NO_CACHE eliminates RAM but recomputes per call." + } + } +} diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md index 9169c50..e547ce4 100644 --- a/compare_lfm/comparison_summary.md +++ b/compare_lfm/comparison_summary.md @@ -23,13 +23,13 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. | Function | FR max LSB | FR max %% | FR avg %% | lfm max LSB | lfm max %% | lfm avg %% | Winner | |----------|----------:|---------:|---------:|----------:|---------:|---------:|--------| -| sin | 8.8 | 1.0615 | 0.0158 | 507.6 | 74.5513 | 0.6105 | FR | -| cos | 8.2 | 0.9018 | 0.0161 | 508.3 | 74.4001 | 0.6121 | FR | -| tan | 55.7 | 1.0080 | 0.0228 | 1196.0 | 0.7099 | 0.0410 | FR | -| asin | 31.3 | 0.5795 | 0.0134 | 667.1 | 20.1233 | 2.4452 | FR | -| acos | 31.0 | 0.5194 | 0.0056 | 667.8 | 15.3142 | 0.3475 | FR | -| atan | 62.7 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR | -| atan2 | 63.6 | 0.4122 | 0.0258 | 666.7 | 20.0045 | 0.9267 | FR | +| sin | 4.9 | 0.4816 | 0.0081 | 507.6 | 74.5513 | 0.6105 | FR | +| cos | 4.4 | 0.3282 | 0.0077 | 508.3 | 74.4001 | 0.6121 | FR | +| tan | 13.0 | 0.1551 | 0.0055 | 1196.0 | 0.7099 | 0.0410 | FR | +| asin | 24.9 | 1.9776 | 0.0477 | 667.1 | 20.1233 | 2.4452 | FR | +| acos | 24.6 | 0.2724 | 0.0093 | 667.8 | 15.3142 | 0.3475 | FR | +| atan | 59.9 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR | +| atan2 | 62.5 | 0.4122 | 0.0239 | 666.7 | 20.0045 | 0.9267 | FR | | sqrt | 0.5 | 0.0062 | 0.0001 | 0.5 | 0.0062 | 0.0001 | tie | | exp | 208.3 | 0.1486 | 0.0078 | 216.3 | 0.0756 | 0.0042 | FR | | ln | 3.2 | 0.3012 | 0.0006 | 2.2 | 0.0557 | 0.0002 | lfm | @@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. | Function | FR_math | libfixmath | Speedup | Faster | |----------|--------:|-----------:|--------:|--------| -| sin | 2.6 | 20.7 | 7.94x | FR | -| cos | 4.8 | 18.4 | 3.86x | FR | -| tan | 6.0 | 41.4 | 6.89x | FR | -| asin | 11.5 | 53.7 | 4.67x | FR | -| acos | 8.4 | 50.4 | 5.97x | FR | -| atan | 8.0 | 11.2 | 1.41x | FR | -| atan2 | 15.9 | 10.5 | 0.66x | lfm | -| sqrt | 18.6 | 19.8 | 1.06x | FR | -| exp | 3.1 | 67.6 | 22.02x | FR | -| ln | 8.8 | 479.3 | 54.70x | FR | -| log2 | 8.7 | 39.4 | 4.55x | FR | +| sin | 5.6 | 10.6 | 1.91x | FR | +| cos | 8.9 | 13.3 | 1.50x | FR | +| tan | 7.1 | 32.6 | 4.57x | FR | +| asin | 9.7 | 49.5 | 5.11x | FR | +| acos | 8.4 | 50.7 | 6.03x | FR | +| atan | 8.1 | 11.0 | 1.37x | FR | +| atan2 | 15.9 | 10.9 | 0.69x | lfm | +| sqrt | 18.6 | 19.9 | 1.07x | FR | +| exp | 3.0 | 64.7 | 21.28x | FR | +| ln | 9.0 | 453.2 | 50.53x | FR | +| log2 | 8.5 | 39.4 | 4.63x | FR | | mul | 0.9 | 1.2 | 1.33x | FR | -| div | 0.9 | 5.2 | 5.98x | FR | -| hypot | 20.0 | --- | --- | FR only | -| hypot_fast8 | 2.4 | --- | --- | FR only | +| div | 0.9 | 5.3 | 6.10x | FR | +| hypot | 19.9 | --- | --- | FR only | +| hypot_fast8 | 2.6 | --- | --- | FR only | ### Summary (13 head-to-head functions) diff --git a/docs/README.md b/docs/README.md index c88b451..dad53da 100644 --- a/docs/README.md +++ b/docs/README.md @@ -43,30 +43,32 @@ or any tooling. If you want the browser version, look in Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. See the [TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12, -16, and 24. Percent errors skip expected values near zero (|expected| < 0.01). - - -| Function | Max err (%) | Avg err (%) | Note | -|---|---:|---:|---| -| sin/cos (BAM) | 0.1646 | 0.0058 | 65536 BAM; 129-entry quadrant table | -| sin/cos (deg) | 0.5909 | 0.0091 | 65536-pt deg r7 + specials | -| sin/cos (rad) | 0.1646 | 0.0059 | 65536-pt rad r16 | -| tan (BAM) | 0.1704 | 0.0065 | 65536 BAM; 65-entry octant table | -| tan (deg) | 0.6000 | 0.0140 | 65536-pt deg r7 + specials | -| tan (rad) | 0.1704 | 0.0065 | 65536-pt rad r16 | -| asin / acos | 1.9776 | 0.0308 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0238 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0153 | 20001-pt sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0003 | 0.0000 | Round-to-nearest | -| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | -| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | -| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | -| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | -| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | -| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | +16, and 24. + + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.4578 | 0.0076 | fr_sin_bam/fr_cos_bam direct; 129-entry table | +| sin/cos (deg) | 0.4578 | 0.0076 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | +| sin/cos (rad) | 0.6104 | 0.0085 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | +| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | FR_Tan ±360° s15.16 full; sat at poles | +| tan (rad) | 13.4069 | 0.0029 | fr_tan ±2π r16 full; sat at poles | +| asin / acos | 0.8743 | 0.0301 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.5100 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.3390 | 0.0154 | 20001-pt full sweep [-10,10]; via FR_atan2 | +| sqrt | 0.0239 | 0.0000 | Round-to-nearest | +| log2 | 0.0286 | 0.0029 | 65-entry mantissa table | +| pow2 | 0.0019 | 0.0003 | 65-entry fraction table | +| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | +| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0007 | 0.0000 | FR_MULK28 + FR_pow2 | +| pow10_fast | 0.0028 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. ## What's in the box @@ -75,8 +77,8 @@ radix — Q16.16 is just the reference point for the table. See the | --- | --- | | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | -| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin_deg`, `fr_cos_deg` | +| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` | +| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` | | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | @@ -167,18 +169,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */ * * MixedCase FR_ names are functions — they contain loops, tables, or * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave + * generators, ADSR envelopes): + * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... + * + * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are + * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code + * should use the fr_ names directly. * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ diff --git a/docs/api-reference.md b/docs/api-reference.md index 1ec3742..79a1cd9 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -291,7 +291,7 @@ bits = 16. Going wider would only add noise, not precision. "But what if I want to pass in any signed angle without worrying about conversion?" That is exactly what `FR_CosI(deg)`, -`FR_Cos(deg, radix)`, and `fr_cos(rad, radix)` are for. All three +`fr_cos_deg(deg, radix)`, and `fr_cos(rad, radix)` are for. All three take *signed* inputs and reduce them to BAM for you. The only place you actually see a `u16` is at the internal `fr_cos_bam` / `fr_sin_bam` boundary, which you only call by hand if you *want* @@ -405,35 +405,35 @@ represents exactly 1.0 in the s15.16 output format. | `fr_sin` | `s32 fr_sin(s32 rad, u16 radix)` | Same convention. | | `fr_tan` | `s32 fr_tan(s32 rad, u16 radix)` | Returns at **radix 16** (`FR_TRIG_OUT_PREC`). Computed as `(sin << 16) / cos`; saturates to `±INT32_MAX` (`FR_TRIG_MAXVAL`) near π/2 + kπ where cos → 0. | -### Integer-degree wrappers (legacy API) +### Degree wrappers (current and legacy) -The uppercase legacy API takes an angle in degrees. -`FR_SinI`, `FR_CosI` and `FR_TanI` -take plain integer degrees — the trailing *I* denotes -*integer*. The variants *without* the `I` -suffix (`FR_Sin`, `FR_Cos`, `FR_Tan`) -accept a `radix` argument and treat the degree value as -*fixed-point*, so you can pass fractional degrees like -42.375°. +The primary degree-based API uses lowercase `fr_` names. +These are functions (not macros) that take a degree value as +fixed-point at a caller-chosen radix: -| Symbol | Signature | Kind | +| Function | Signature | Notes | | --- | --- | --- | -| `FR_SinI` | `FR_SinI(deg)` → `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. | -| `FR_CosI` | `FR_CosI(deg)` → `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. | -| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | -| `FR_Sin` | `s32 FR_Sin(s16 deg, u16 radix)` | `deg` is fixed-point at `radix`. Returns s15.16. | -| `FR_Cos` | `s32 FR_Cos(s16 deg, u16 radix)` | Same. | -| `FR_Tan` | `s32 FR_Tan(s16 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | +| `fr_sin_deg` | `s32 fr_sin_deg(s32 deg, u16 radix)` | `deg` is fixed-point degrees at `radix`. Returns s15.16. | +| `fr_cos_deg` | `s32 fr_cos_deg(s32 deg, u16 radix)` | Same. | +| `fr_tan_deg` | `s32 fr_tan_deg(s32 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | -### Degree wrappers on the BAM path +Pass `radix = 0` for plain integer degrees, or a higher radix +for fractional degrees (e.g. 42.375° at radix 4). -If you're using the lowercase family and want to skip the -radix entirely, two convenience macros cover pure integer degrees: +**Integer-degree macros** (`FR_SinI`, `FR_CosI`, `FR_TanI`) +take plain integer degrees -- the trailing *I* denotes +*integer*. These remain unchanged: -| Macro | Expansion | -| --- | --- | -| `fr_cos_deg(deg)` | `fr_cos_bam(FR_DEG2BAM(deg))` | -| `fr_sin_deg(deg)` | `fr_sin_bam(FR_DEG2BAM(deg))` | +| Symbol | Signature | Kind | +| --- | --- | --- | +| `FR_SinI` | `FR_SinI(deg)` -> `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. | +| `FR_CosI` | `FR_CosI(deg)` -> `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. | +| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | + +**Legacy aliases.** The uppercase `FR_Sin`, `FR_Cos`, and +`FR_Tan` macros still work -- they map directly to +`fr_sin_deg`, `fr_cos_deg`, and `fr_tan_deg` respectively. +New code should use the `fr_` names. ## Inverse trigonometry diff --git a/docs/building.md b/docs/building.md index da61b4f..4a17c1a 100644 --- a/docs/building.md +++ b/docs/building.md @@ -96,7 +96,7 @@ binaries to keep compile times low: | Binary | What it checks | | --- | --- | | `test_basic` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding. | -| `test_trig` | Integer-degree trig (`FR_Sin` et al.). | +| `test_trig` | Integer-degree trig (`fr_sin_deg` et al.). | | `test_trig_radians` | Radian / BAM trig and the v2 `fr_sin` API. | | `test_log_exp` | Log base 2 / ln / log10 and their inverses. | | `test_2d` | 2D transforms, determinants, inverses. | diff --git a/docs/examples.md b/docs/examples.md index 1716efd..d07a477 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -57,15 +57,15 @@ int main(void) ## 2. Trig — integer degrees vs radian vs BAM FR_Math supports three angle conventions and this example hits -all three: integer degrees through the legacy -`FR_Sin` / `FR_Cos` API, the radian-native +all three: fixed-point degrees through the +`fr_sin_deg` / `fr_cos_deg` API, the radian-native `fr_sin` / `fr_cos` (radian at a chosen input radix), and BAM-native `fr_sin_bam` / `fr_cos_bam`. All three paths feed the same 129-entry quadrant cosine table under the hood and should produce nearly identical results. -*Caveats:* the `radix` parameter on `FR_Sin(deg, radix)` is +*Caveats:* the `radix` parameter on `fr_sin_deg(deg, radix)` is the radix of the *degree input*, not the output. All sin/cos functions return **s15.16** — that is, `s32` at radix 16, where 1.0 = 65536 (`FR_TRIG_ONE`). The values compared below diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md index ab79007..489a127 100644 --- a/docs/fixed-point-primer.md +++ b/docs/fixed-point-primer.md @@ -862,8 +862,8 @@ generation of each symbol: | Prefix | What it is | Example | | --- | --- | --- | | `FR_XXX()` | `UPPERCASE` macro — inline, zero call overhead. | `FR_ADD`, `FR_ABS`, `FR2I` | -| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Integer-degree trig and related. | `FR_Sin`, `FR_log2`, `FR_sqrt` | -| `fr_xxx()` | Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR). | `fr_sin`, `fr_wave_tri`, `fr_adsr_step` | +| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Log, sqrt, inverse trig, and related. | `FR_log2`, `FR_sqrt`, `FR_atan2` | +| `fr_xxx()` | Lowercase C function — v2 API (degree/radian/BAM trig, wave generators, ADSR). `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` are the current degree-based trig names. `FR_Sin`/`FR_Cos`/`FR_Tan` remain as legacy aliases. | `fr_sin_deg`, `fr_sin`, `fr_wave_tri`, `fr_adsr_step` | | `s8, s16, s32` | Signed integer typedefs (aliases for `int8_t`, `int16_t`, `int32_t`). | — | | `u8, u16, u32` | Unsigned integer typedefs. | — | @@ -923,7 +923,7 @@ explicitly mask off the top bits (and handle negative values) before the quadrant extraction (`bam >> 14`) made any sense. You would have traded one free operation for two slow ones on every sample, just to get back the same behavior. So instead, the public -trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends) +trig entry points (`FR_CosI`, `fr_cos_deg`, `fr_cos`, and friends) *all* take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal `fr_cos_bam` / `fr_sin_bam` primitives see the `u16`. In practice you will never construct a diff --git a/keywords.txt b/keywords.txt index 1ab2703..f7e784e 100644 --- a/keywords.txt +++ b/keywords.txt @@ -14,9 +14,9 @@ fr_adsr_t KEYWORD1 FR_FixMuls KEYWORD2 FR_FixMulSat KEYWORD2 FR_FixAddSat KEYWORD2 -FR_Cos KEYWORD2 -FR_Sin KEYWORD2 -FR_Tan KEYWORD2 +fr_sin_deg KEYWORD2 +fr_cos_deg KEYWORD2 +fr_tan_deg KEYWORD2 FR_TanI KEYWORD2 FR_acos KEYWORD2 FR_asin KEYWORD2 @@ -99,3 +99,6 @@ FR_SLOG2E LITERAL1 FR_SrLOG2E LITERAL1 FR_SLOG2_10 LITERAL1 FR_SrLOG2_10 LITERAL1 +FR_Cos LITERAL1 +FR_Sin LITERAL1 +FR_Tan LITERAL1 diff --git a/makefile b/makefile index eb8a9fc..83a4e64 100644 --- a/makefile +++ b/makefile @@ -57,6 +57,10 @@ help: @echo " size-report Multi-architecture size report" @echo " size-simple Size report for current platform" @echo "" + @echo "Tools:" + @echo " tools Build diagnostic tools" + @echo " trig-neighborhood Build function neighborhood explorer" + @echo "" @echo "Maintenance:" @echo " clean Remove build artifacts" @echo " cleanall Remove build artifacts and backups" @@ -211,6 +215,19 @@ size-simple: lib ls -lh $(BUILD_DIR)/*.o; \ fi +# Tools +TOOLS_DIR = tools + +.PHONY: tools +tools: dirs trig-neighborhood + +.PHONY: trig-neighborhood +trig-neighborhood: $(BUILD_DIR)/trig_neighborhood + +$(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR_math.c $(HEADERS) + $(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/tool_FR_math.o + $(CXX) $(CXXFLAGS) $(TOOLS_DIR)/trig_neighborhood.cpp $(BUILD_DIR)/tool_FR_math.o $(LDFLAGS) -o $@ + # Clean .PHONY: clean clean: diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html index 328d2f4..3e8a17f 100644 --- a/pages/guide/api-reference.html +++ b/pages/guide/api-reference.html @@ -507,7 +507,7 @@

    Why u16 for BAM (not s32)?

    “But what if I want to pass in any signed angle without worrying about conversion?” That is exactly what -FR_CosI(deg), FR_Cos(deg, radix), and +FR_CosI(deg), fr_cos_deg(deg, radix), and fr_cos(rad, radix) are for. All three take signed inputs and reduce them to BAM for you. The only place you actually see a u16 is at the internal @@ -628,16 +628,18 @@

    Radian-native

    -

    Integer-degree wrappers (legacy API)

    +

    Degree wrappers

    -

    The uppercase legacy API takes an angle in degrees. +

    The degree API takes an angle in degrees. FR_SinI, FR_CosI and FR_TanI take plain integer degrees — the trailing I denotes -integer. The variants without the I -suffix (FR_Sin, FR_Cos, FR_Tan) -accept a radix argument and treat the degree value as -fixed-point, so you can pass fractional degrees like -42.375°.

    +integer. The primary degree wrappers with a +radix argument are fr_sin_deg, +fr_cos_deg, and fr_tan_deg — they +treat the degree value as fixed-point, so you can pass +fractional degrees like 42.375°. The uppercase names +FR_Sin, FR_Cos, and FR_Tan +are legacy aliases that map to the same functions.

    @@ -645,24 +647,23 @@

    Integer-degree wrappers (legacy API)

    - - - + + + + + +
    SymbolSignatureKind
    FR_SinIFR_SinI(deg)s32 (s15.16)Macro: fr_sin_bam(FR_DEG2BAM(deg)). Zero-cost inline.
    FR_CosIFR_CosI(deg)s32 (s15.16)Macro: fr_cos_bam(FR_DEG2BAM(deg)).
    FR_TanIs32 FR_TanI(s16 deg)Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
    FR_Sins32 FR_Sin(s16 deg, u16 radix)deg is fixed-point at radix. Returns s15.16.
    FR_Coss32 FR_Cos(s16 deg, u16 radix)Same.
    FR_Tans32 FR_Tan(s16 deg, u16 radix)Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
    fr_sin_degs32 fr_sin_deg(s32 deg, u16 radix)Function. deg is fixed-point at radix. Returns s15.16.
    fr_cos_degs32 fr_cos_deg(s32 deg, u16 radix)Function. Same.
    fr_tan_degs32 fr_tan_deg(s32 deg, u16 radix)Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
    FR_SinFR_Sin(deg, radix)Legacy macro alias for fr_sin_deg.
    FR_CosFR_Cos(deg, radix)Legacy macro alias for fr_cos_deg.
    FR_TanFR_Tan(deg, radix)Legacy macro alias for fr_tan_deg.
    -

    Degree wrappers on the BAM path

    +

    Degree-to-BAM path

    -

    If you’re using the lowercase family and want to skip the -radix entirely, two convenience macros cover pure integer degrees:

    - - - - - - - -
    MacroExpansion
    fr_cos_deg(deg)fr_cos_bam(FR_DEG2BAM(deg))
    fr_sin_deg(deg)fr_sin_bam(FR_DEG2BAM(deg))
    +

    fr_cos_deg, fr_sin_deg, and +fr_tan_deg are now functions (not macros). They accept +a fixed-point degree value with a radix argument, +convert to BAM internally, and call the BAM core. For plain integer +degrees with no radix parameter, use FR_CosI / +FR_SinI / FR_TanI instead.

    Inverse trigonometry

    diff --git a/pages/guide/building.html b/pages/guide/building.html index 246afbb..2ea77bc 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -114,7 +114,7 @@

    The test suite

    BinaryWhat it checks test_basicRadix conversions, FR_ADD, FR_FixMuls, rounding. -test_trigInteger-degree trig (FR_Sin et al.). +test_trigInteger-degree trig (fr_sin_deg et al.). test_trig_radiansRadian / BAM trig and the v2 fr_sin API. test_log_expLog base 2 / ln / log10 and their inverses. test_2d2D transforms, determinants, inverses. diff --git a/pages/guide/examples.html b/pages/guide/examples.html index fec3f91..71882f9 100644 --- a/pages/guide/examples.html +++ b/pages/guide/examples.html @@ -70,8 +70,9 @@

    1. Basic radix conversion

    2. Trig — integer degrees vs radian vs BAM

    FR_Math supports three angle conventions and this example hits -all three: integer degrees through the legacy -FR_Sin / FR_Cos API, the radian-native +all three: integer degrees through +fr_sin_deg / fr_cos_deg (or the legacy +aliases FR_Sin / FR_Cos), the radian-native fr_sin / fr_cos (radian at a chosen input radix), and BAM-native fr_sin_bam / fr_cos_bam. All three paths feed the same 129-entry @@ -79,7 +80,7 @@

    2. Trig — integer degrees vs radian vs BAM

    identical results.

    Caveats: the radix parameter on -FR_Sin(deg, radix) is the radix of the degree +fr_sin_deg(deg, radix) is the radix of the degree input, not the output. All sin/cos functions return s15.16 — that is, s32 at radix 16, where 1.0 = 65536 (FR_TRIG_ONE). The values compared diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html index ce41074..73325ef 100644 --- a/pages/guide/fixed-point-primer.html +++ b/pages/guide/fixed-point-primer.html @@ -911,8 +911,8 @@

    FR_Math’s naming conventions

    PrefixWhat it isExample FR_XXX()UPPERCASE macro — inline, zero call overhead.FR_ADD, FR_ABS, FR2I -FR_Xxx()Mixed-case C function — the classic v1 API. Integer-degree trig and related.FR_Sin, FR_log2, FR_sqrt -fr_xxx()Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR).fr_sin, fr_wave_tri, fr_adsr_step +FR_Xxx()Mixed-case C function or legacy alias. FR_Sin/FR_Cos/FR_Tan are legacy aliases for fr_sin_deg/fr_cos_deg/fr_tan_deg.FR_log2, FR_sqrt, FR_Sin (legacy) +fr_xxx()Lowercase C function — the current API for degree wrappers, radian / BAM trig, wave generators, ADSR.fr_sin_deg, fr_cos_deg, fr_sin, fr_wave_tri, fr_adsr_step s8, s16, s32Signed integer typedefs (aliases for int8_t, int16_t, int32_t).— u8, u16, u32Unsigned integer typedefs.— @@ -980,7 +980,7 @@

    Angle representations

    (bam >> 14) made any sense. You would have traded one free operation for two slow ones on every sample, just to get back the same behavior. So instead, the public trig entry points -(FR_CosI, FR_Cos, fr_cos, and +(FR_CosI, fr_cos_deg, fr_cos, and friends) all take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal fr_cos_bam / fr_sin_bam primitives see diff --git a/pages/index.html b/pages/index.html index 62e6b7a..0040cd4 100644 --- a/pages/index.html +++ b/pages/index.html @@ -47,34 +47,34 @@

    Measured accuracy

    Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. See the TDD -report for sweeps at radixes 8, 12, 16, and 24. -Percent errors skip expected values near zero (|expected| < 0.01).

    - - - - - - - - - - - - - - - - - - - - - - - - - -
    FunctionMax err (%)Avg err (%)Note
    sin/cos (BAM)0.16460.005865536 BAM; 129-entry quadrant table
    sin/cos (deg)0.59090.009165536-pt deg r7 + specials
    sin/cos (rad)0.16460.005965536-pt rad r16
    tan (BAM)0.17040.006565536 BAM; 65-entry octant table
    tan (deg)0.60000.014065536-pt deg r7 + specials
    tan (rad)0.17040.006565536-pt rad r16
    asin / acos1.97760.030865536-pt; sqrt approx near boundary
    atan20.49530.023865536x5 radii; asin/acos+hypot_fast8
    atan0.29850.015320001-pt sweep [-10,10]; via FR_atan2
    sqrt0.00030.0000Round-to-nearest
    log20.24790.004565-entry mantissa table
    pow20.13730.005765-entry fraction table
    ln, log100.00150.0004Via FR_MULK28 from log2
    exp0.07190.0051FR_MULK28 + FR_pow2
    exp_fast0.07190.0064Shift-only scaling
    pow100.11630.0075FR_MULK28 + FR_pow2
    pow10_fast0.11630.0100Shift-only scaling
    hypot (exact)0.00010.000064-bit intermediate
    hypot_fast8 (8-seg)0.09770.0508Shift-only, no multiply
    +report for sweeps at radixes 8, 12, 16, and 24.

    + + + + + + + + + + + + + + + + + + + + + + + + + +
    FunctionMax err (%)*Avg err (%)Note
    sin/cos (BAM)0.45780.0076fr_sin_bam/fr_cos_bam direct; 129-entry table
    sin/cos (deg)0.45780.0076FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
    sin/cos (rad)0.61040.0085fr_sin/fr_cos via fr_rad_to_bam ±2π r16
    tan (BAM)0.58230.0008fr_tan_bam 65536-pt full; ±maxint at poles
    tan (deg)0.53110.0008FR_Tan ±360° s15.16 full; sat at poles
    tan (rad)13.40690.0029fr_tan ±2π r16 full; sat at poles
    asin / acos0.87430.030165536-pt; sqrt approx near boundary
    atan20.51000.023765536x5 radii; asin/acos+hypot_fast8
    atan0.33900.015420001-pt full sweep [-10,10]; via FR_atan2
    sqrt0.02390.0000Round-to-nearest
    log20.02860.002965-entry mantissa table
    pow20.00190.000365-entry fraction table
    ln, log100.00040.0000Via FR_MULK28 from log2
    exp0.00030.0000FR_MULK28 + FR_pow2
    exp_fast0.00090.0001Shift-only scaling
    pow100.00070.0000FR_MULK28 + FR_pow2
    pow10_fast0.00280.0002Shift-only scaling
    hypot (exact)0.00000.000064-bit intermediate
    hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
    +

    *Relative error; reference clamped to 1% of full-scale output.

    What’s in the box

    @@ -84,8 +84,8 @@

    What’s in the box

    ArithmeticFR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX UtilityFR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN -Trig (integer deg)FR_Sin, FR_Cos, FR_Tan, FR_SinI, FR_CosI, FR_TanI -Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam, fr_sin_deg, fr_cos_deg +Trig (integer deg)fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI +Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam Inverse trigFR_atan, FR_atan2, FR_asin, FR_acos Log / expFR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 RootsFR_sqrt, FR_hypot, FR_hypot_fast8 @@ -183,20 +183,25 @@

    Quick taste

    * conversions and simple arithmetic: * I2FR, FR2I, FR_NUM, FR_ADD, FR_DIV, FR_ABS, FR_CHRDX, FR_EXP ... * - * MixedCase FR_ names are functions — they contain loops, tables, or - * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * MixedCase FR_ names are legacy functions — they still work but + * map to the current lowercase names: + * FR_Cos → fr_cos_deg, FR_Sin → fr_sin_deg, FR_Tan → fr_tan_deg + * + * lowercase fr_ names are the current API (degree wrappers, radian + * trig, BAM trig, wave generators, ADSR envelopes): + * fr_cos_deg, fr_sin_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * Other MixedCase / lowercase FR_ names are functions with loops, + * tables, or multi-step algorithms: + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ diff --git a/scripts/accuracy_report.sh b/scripts/accuracy_report.sh index 1bd5745..f996ac1 100755 --- a/scripts/accuracy_report.sh +++ b/scripts/accuracy_report.sh @@ -86,12 +86,13 @@ patch_markdown() { return fi - # Build replacement block: sentinel + header + separator + data + sentinel + # Build replacement block: sentinel + header + separator + data + footnote + sentinel local replacement replacement=""$'\n' - replacement+="| Function | Max err (%) | Avg err (%) | Note |"$'\n' + replacement+="| Function | Max err (%)*| Avg err (%) | Note |"$'\n' replacement+="|---|---:|---:|---|"$'\n' replacement+="$DATA_ROWS"$'\n' + replacement+=$'\n'"*Relative error; reference clamped to 1% of full-scale output."$'\n' replacement+="" # Use perl to replace between sentinels @@ -137,11 +138,12 @@ patch_html() { local replacement replacement=""$'\n' replacement+=""$'\n' - replacement+=""$'\n' + replacement+=""$'\n' replacement+=""$'\n' replacement+="$html_rows"$'\n' replacement+=""$'\n' replacement+="
    FunctionMax err (%)Avg err (%)Note
    FunctionMax err (%)*Avg err (%)Note
    "$'\n' + replacement+="

    *Relative error; reference clamped to 1% of full-scale output.

    "$'\n' replacement+="" perl -0777 -i -pe " diff --git a/src/FR_math.c b/src/FR_math.c index 3a7a1d4..45c75c2 100644 --- a/src/FR_math.c +++ b/src/FR_math.c @@ -37,85 +37,208 @@ #endif /*======================================================= - * BAM-native trig: fr_cos_bam, fr_sin_bam, fr_cos, fr_sin, fr_tan + * Full-precision radian/degree → BAM conversion helpers + * + * rad_to_bam_full(r) returns a full s32 BAM value where: + * upper 16 bits = integer BAM (the u16 table index) + * lower 16 bits = sub-BAM fractional part + * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. + * + * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are + * reordered so intermediate sums stay within s32 for |r| <= pi at r16. + */ +static s32 rad_to_bam_full(s32 r) +{ + /* 10 terms: 65536/(2*pi) ≈ 10430.37835... + * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 + * = 10430.378357 (~21-bit accuracy) + * Terms reordered: interleave negatives early to keep all intermediate + * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ + return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); +} + +/* deg_to_bam_full(d) — same idea for degrees. + * Input d must already be normalized to radix 16 and reduced to [-90, 90). + * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). + * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ +static s32 deg_to_bam_full(s32 d) +{ + return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); +} + +/* Normalize a fixed-radix value to radix 16. */ +static s32 normalize_to_r16(s32 val, u16 radix) +{ + return (radix > 16) ? (val >> (radix - 16)) + : (radix < 16) ? (val << (16 - radix)) + : val; +} + +/* Reduce non-negative radian (at r16) to [0, 2*pi). + * Helper used by range_reduce_rad and the near-pi small-angle paths. */ +static s32 reduce_to_2pi(s32 r) +{ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + if (r > (two_pi << 1)) + r -= (r / two_pi) * two_pi; + else if (r > two_pi) + r -= two_pi; + return r; +} + +/* Range-reduce radian value (at r16, non-negative) to [-pi, pi]. + * Caller guarantees r >= 0 (sign is handled externally). */ +static s32 range_reduce_rad(s32 r) +{ + r = reduce_to_2pi(r); + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + return r; +} + +/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. + * Normalizes to r16, reduces via positive-only path, applies shift-only multiply. + * Handles inputs beyond ±2*pi with modulus (slow path). */ +u16 fr_rad_to_bam(s32 rad, u16 radix) +{ + s32 r = normalize_to_r16(rad, radix); + /* BAM wraps naturally in u16, but range_reduce expects non-negative. + * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */ + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = range_reduce_rad(r); + s32 bam_full = rad_to_bam_full(r); + if (sign < 0) bam_full = -bam_full; + return (u16)((bam_full + (1 << 15)) >> 16); +} + +/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. + * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ +u16 fr_deg_to_bam(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + + /* Reduce to [-180, 180) */ + if (d >= FR_D360_R16 || d < -FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + if (d < -FR_D180_R16) d += FR_D360_R16; + + /* Reduce to [-90, 90) with BAM quadrant offset */ + u16 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } + + return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); +} + +/*======================================================= + * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan * * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd * quadrants (1, 3) reverse the in-quadrant index so the table is always read - * in the same direction. Quadrants 1 and 2 get their sign flipped at the - * end. - * - * Within each quadrant, the upper FR_TRIG_TABLE_BITS bits of the - * in-quadrant value index the table; the lower FR_TRIG_FRAC_BITS bits drive - * round-to-nearest linear interpolation between adjacent table entries. - * - * The last entry (table[FR_TRIG_TABLE_SIZE-1] = 0) means the - * interpolation at the very edge of the quadrant never reads out of bounds. - * - * Rounding: we interpolate as - * v = lo - ((d * frac + HALF) >> FRAC_BITS) - * where d = lo - hi (which is >= 0 because cos is monotonically decreasing - * on [0, pi/2]). Using the subtract form guarantees the argument of >> is - * always non-negative, so the behavior is portable C89 (no reliance on - * implementation-defined right-shift of negative integers) and the +HALF - * gives unambiguous round-half-up. Max error vs the true cos is ~1 LSB of - * s0.15 (~3e-5 absolute); mean error ~0 (no bias). + * in the same direction. + * + * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at + * index 128). After mirroring, small full_pos → small output (near zero), + * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles + * below one table step (~0.7°). This eliminates table quantization error + * in the region where it matters most. + * + * Sign rule: quadrants 2 and 3 negate the result. + * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. */ -s32 fr_cos_bam(u16 bam) +s32 fr_sin_bam(u16 bam) { - u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ - u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ - u32 idx, frac; - s32 lo, hi, d, v; - - /* Exact cardinal angles: bam=0 → 1.0, bam=16384 → 0, etc. */ - if (inq == 0) - { - if (q == 0) return FR_TRIG_ONE; /* 0° → 1.0 */ - if (q == 2) return -FR_TRIG_ONE; /* 180° → -1.0 */ - return 0; /* 90° or 270° → 0 */ + u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ + u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ + + /* Exact cardinal angles */ + if (inq == 0) { + if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ + if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ + return -FR_TRIG_ONE; /* 270° → -1.0 */ } + /* Odd quadrants mirror: read table from the far end */ if (q == 1 || q == 3) - inq = FR_TRIG_QUADRANT - inq; /* mirror across pi/2 */ - - idx = inq >> FR_TRIG_FRAC_BITS; /* table index [0..SIZE-1] */ - frac = inq & FR_TRIG_FRAC_MASK; /* interp fraction */ - lo = gFR_COS_TAB_Q[idx]; - hi = gFR_COS_TAB_Q[idx + 1]; - d = lo - hi; /* >= 0: cos monotonic */ - v = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); - - if (v < 0x40) { - /* Near zero crossing: redo interpolation with 3 extra bits of - * precision to reduce rounding error when the result is small. */ - s32 lo3 = (s32)gFR_COS_TAB_Q[idx] << 3; - s32 d3 = lo3 - ((s32)gFR_COS_TAB_Q[idx + 1] << 3); - v = lo3 - (((d3 * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); - v = (v + 2) >> 2; /* s0.18 → s15.16 with rounding */ + inq = FR_TRIG_QUADRANT - inq; + + s32 v; + + /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). + * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. + * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ + if (inq < FR_TRIG_FRAC_MAX) { + v = (s32)(((u32)inq * 102944u + 8192u) >> 14); } else { - v <<= 1; /* s0.15 → s15.16 */ + /* Table lookup with 7-bit interpolation fraction */ + u32 idx = inq >> FR_TRIG_FRAC_BITS; + u32 frac = inq & FR_TRIG_FRAC_MASK; + s32 lo = (s32)gFR_SIN_TAB_Q[idx]; + s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; + v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); + v <<= 1; /* u0.15 → s15.16 */ } - return (q == 1 || q == 2) ? -v : v; + return (q >= 2) ? -v : v; } -s32 fr_sin_bam(u16 bam) +s32 fr_cos_bam(u16 bam) { - /* sin(x) = cos(x - pi/2) = cos(bam - 16384). The u16 wraparound makes - * this completely free. - */ - return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT)); + /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ + return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); } s32 fr_cos(s32 rad, u16 radix) { + if (rad == 0) return FR_TRIG_ONE; + s32 r = normalize_to_r16(rad, radix); + if (r < 0) r = -r; + r = reduce_to_2pi(r); + /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, + * cos(3π/2+δ) = sin(δ) ≈ δ. */ + s32 delta = r - FR_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return -delta; + delta = r - FR_THREE_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return delta; return fr_cos_bam(fr_rad_to_bam(rad, radix)); } s32 fr_sin(s32 rad, u16 radix) { - return fr_sin_bam(fr_rad_to_bam(rad, radix)); + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); + /* Near 0 after reduction: sin(δ) ≈ δ */ + if (r < 256) { + s32 v = r; + return (sign < 0) ? -v : v; + } + /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ + delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ + if (r > FR_PI(16)) r -= FR_TWO_PI(16); + u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); + s32 v = fr_sin_bam(bam); + return (sign < 0) ? -v : v; } /*======================================================= @@ -205,59 +328,219 @@ s32 fr_tan_bam(u16 bam) return (sign < 0) ? -raw : raw; } -/* fr_tan: returns tan at s15.16 (radix 16). Uses BAM-native table. - * At exact poles, fr_tan_bam's sign convention is based on BAM quadrant - * which loses the original approach direction. Fix up: if the result - * saturates, the sign should match the sign of the radian input. */ -s32 fr_tan(s32 rad, u16 radix) +/* fr_tan — radian-input tangent with full sub-BAM precision. + * + * Goes directly to the 65-entry octant tangent table with 16-bit + * interpolation precision. Sign from quadrant, magnitude from table. + * No s64 intermediates. One 32-bit division in the second-octant path. + * + * Architecture: + * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-) + * 2. Magnitude: from octant table lookup + reciprocal identity + * - First octant [0,45°): direct table lerp + * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal + * 3. Return sign * magnitude */ + +/* Internal: given a full s32 BAM, compute |tan| directly from the table. + * Returns the unsigned magnitude (always >= 0). */ +static s32 tan_mag_from_bam_full(s32 bam_full) { - s32 result = fr_tan_bam(fr_rad_to_bam(rad, radix)); - if (result == FR_TRIG_MAXVAL && rad < 0) - return -FR_TRIG_MAXVAL; - if (result == -FR_TRIG_MAXVAL && rad > 0) + u16 bam0 = (u16)(bam_full >> 16); + u32 frac_sub = (u32)bam_full & 0xFFFFu; + + u32 q = ((u32)bam0 >> 14) & 0x3u; + u32 inq = (u32)bam0 & 0x3FFFu; + + /* Exact zeros: tan(0°) = tan(180°) = 0 */ + if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2)) + return 0; + + /* Exact poles: tan(90°) = tan(270°) → saturate */ + if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3)) return FR_TRIG_MAXVAL; - return result; + + /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range. + * After this, full_pos represents distance from the nearest zero. */ + u32 full_pos; + if (q == 1 || q == 3) + full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub; + else + full_pos = ((u32)inq << 16) + frac_sub; + + /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */ + s32 raw; + if (full_pos < ((u32)FR_TAN_OCTANT << 16)) { + /* First octant [0, 45°): direct table lookup. + * 64 table intervals, each 2^23 sub-BAM units wide. */ + u32 idx = full_pos >> 23; + u32 frac16 = (full_pos >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near zero: redo with 4 extra bits of precision */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x). + * Complement = distance from pole, in first-octant range. */ + u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos; + + u32 idx = comp >> 23; + u32 frac16 = (comp >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near pole: redo with 4 extra bits then reciprocal */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + if (raw_hp < 32) + raw = FR_TRIG_MAXVAL; + else + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + return raw; +} + +s32 fr_tan(s32 rad, u16 radix) +{ + if (rad == 0) return 0; + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 r = normalize_to_r16(rad, radix); + s32 tan_sign = 1; + if (r < 0) { r = -r; tan_sign = -1; } + r = reduce_to_2pi(r); + /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + return (tan_sign < 0) ? -delta : delta; + } + /* Full pipeline */ + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + s32 bam_full = rad_to_bam_full(r); + + /* Sign from quadrant of the BAM position */ + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; } /*======================================================= - * Integer-degree and fixed-radix-degree trig wrappers + * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then + * call the BAM-native functions. Cardinal angles are exact. */ -s32 FR_Cos(s32 deg, u16 radix) + +s32 fr_cos_deg(s32 deg, u16 radix) { - u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); - return fr_cos_bam(bam); + if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); + if (deg < 0) deg = -deg; + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return FR_TRIG_ONE; + if (rem == 90) return 0; + if (rem == 180) return -FR_TRIG_ONE; + if (rem == 270) return 0; + } + /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, + * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta = d - FR_D90_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return -dr; + } + delta = d - (FR_D90_R16 + FR_D180_R16); + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return dr; + } + } + return fr_cos_bam(fr_deg_to_bam(deg, radix)); } -s32 FR_Sin(s32 deg, u16 radix) +s32 fr_sin_deg(s32 deg, u16 radix) { - u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); - return fr_sin_bam(bam); + if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); + s32 sign = 1; + if (deg < 0) { deg = -deg; sign = -1; } + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return 0; + if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; + if (rem == 180) return 0; + if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; + } + s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); + return (sign < 0) ? -v : v; } s32 FR_TanI(s32 deg) { - /* Exact pole: deg mod 180 == ±90. Sign matches input sign - * (positive deg → +MAXVAL, negative deg → -MAXVAL). */ + /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ s32 rem = deg % 180; if (rem == 90 || rem == -90) return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; return fr_tan_bam(FR_DEG2BAM_I(deg)); } -s32 FR_Tan(s32 deg, u16 radix) +/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */ +static s32 range_reduce_deg_bam_full(s32 deg, u16 radix) { - /* Check for exact integer poles before using the shift-only DEG2BAM - * macro, which can map to the wrong BAM quadrant for large angles. - * Only trigger when fractional bits are zero (exact pole). */ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + s32 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; } + return offset + deg_to_bam_full(d); +} + +s32 fr_tan_deg(s32 deg, u16 radix) +{ + if (radix == 0) return FR_TanI(deg); + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 tan_sign = 1; + if (deg < 0) { deg = -deg; tan_sign = -1; } + /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */ s32 frac_mask = (1 << radix) - 1; if ((deg & frac_mask) == 0) { s32 deg_int = deg >> radix; s32 rem = deg_int % 180; - if (rem == 90 || rem == -90) - return (deg >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + if (rem == 0) return 0; + if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; } - u16 bam = (radix == 0) ? FR_DEG2BAM_I(deg) : (u16)((FR_DEG2BAM(deg) + (1 << (radix - 1))) >> radix); - return fr_tan_bam(bam); + s32 bam_full = range_reduce_deg_bam_full(deg, radix); + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; } /*======================================================= @@ -321,11 +604,9 @@ s32 FR_FixAddSat(s32 x, s32 y) /* FR_acos — returns radians at out_radix. * Range: [0, pi]. Input is a cosine value at the given radix. * - * Uses the same 129-entry cosine table as fr_cos_bam, but in reverse: - * binary-search to find the bracketing pair, then linear-interpolate - * the fractional position between them to recover the full 14-bit - * in-quadrant BAM. This mirrors the forward path and gives matching - * precision (~1 LSB of s15.16 output). + * Uses the 129-entry sine table in reverse: binary-search the ascending + * table to find asin(|input|), then acos = pi/2 - asin (with sign handling + * for the second quadrant). */ s32 FR_acos(s32 input, u16 radix, u16 out_radix) { @@ -335,14 +616,11 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 idx, d, num, frac; s32 input_abs; - /* Work with absolute value at the caller's radix — we'll need it for - * the sqrt fast path before quantising to r15. */ + /* Work with absolute value at the caller's radix */ sign = (s16)((input < 0) ? 1 : 0); input_abs = sign ? -input : input; - /* Clamp at the caller's radix — not at r15. Near ±1.0 the r15 - * quantisation can round to 32767 even when the caller has sub-LSB - * precision that the sqrt fast path can use. */ + /* Clamp at the caller's radix */ { s32 one = (s32)1 << radix; if (input_abs >= one) @@ -351,16 +629,11 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ - /* Small-angle fast path: when cos(θ) is close to 1.0, the table - * has only 2-8 LSBs of gap per entry, so linear interpolation is - * very coarse. Use the identity acos(x) ≈ sqrt(2*(1-x)). - * - * Key: compute 1-x at the CALLER's radix, not r15. Near ±1.0 the - * r15 quantisation crushes many distinct inputs to the same value - * (cos(179.5°)..cos(179.9°) all round to 32767 at r15). The - * caller's higher-radix bits carry the angular information via the - * identity sin(θ) = sqrt(2(1-cos θ)) — effectively the sin trick. */ - if (v > gFR_COS_TAB_Q[7]) + /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table + * has poor resolution near the top (entries close together). + * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] + * means the input is > cos(7*π/256) ≈ 0.9975. */ + if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) { s32 one = (s32)1 << radix; s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ @@ -372,35 +645,27 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) return rad_out; } - /* Below this point we need the sign-stripped r15 value for the - * binary search. (v was already computed from input_abs above.) */ - - /* Binary search on the cosine quadrant table. The table is - * monotonically decreasing: gFR_COS_TAB_Q[0] = 32767 (cos 0°), - * gFR_COS_TAB_Q[128] = 0 (cos 90°). + /* Binary search on the ascending sine table. + * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). * - * After the search, lo is the first index where table[lo] <= v, - * so the bracketing pair is (lo-1, lo) with table[lo-1] >= v >= table[lo]. - */ + * Find the first index where table[idx] >= v. */ lo = 0; hi = FR_TRIG_TABLE_SIZE; while (lo < hi) { mid = (lo + hi) >> 1; - if (gFR_COS_TAB_Q[mid] > v) + if ((s32)gFR_SIN_TAB_Q[mid] < v) lo = mid + 1; else hi = mid; } - /* lo is now the index where table[lo] <= v. The bracketing interval - * is [lo-1, lo] (table decreasing). Clamp idx to valid range. - */ + /* lo is now the first index where table[lo] >= v. + * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. + * This gives us the asin angle; acos = pi/2 - asin. */ idx = lo; if (idx <= 0) { - /* v >= table[0] = 32767 — essentially cos(0), already clamped above - * but guard anyway. */ idx = 0; frac = 0; } @@ -411,26 +676,24 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) } else { - /* Linear interpolate between table[idx-1] and table[idx]. - * d = table[idx-1] - table[idx] (>= 0, cos decreasing) - * num = table[idx-1] - v (how far past table[idx-1]) - * frac = (num << FR_TRIG_FRAC_BITS) / d, in [0, FR_TRIG_FRAC_MAX) - * - * num and d are both in [0, 32767], so num << 7 fits in 22 bits. + /* Interpolate between table[idx-1] and table[idx]. + * d = table[idx] - table[idx-1] (>= 0, sin increasing) + * num = v - table[idx-1] (how far past table[idx-1]) */ - d = gFR_COS_TAB_Q[idx - 1] - gFR_COS_TAB_Q[idx]; - num = gFR_COS_TAB_Q[idx - 1] - v; + d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; + num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; if (d > 0) frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; else frac = 0; - /* Reconstruct: the angle is at index (idx-1) + frac/FRAC_MAX, - * so shift idx back by 1 for the BAM calculation below. */ idx = idx - 1; } { - u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* asin_bam is the angle in first-quadrant BAM whose sin = v */ + u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ + u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); if (sign) bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); diff --git a/src/FR_math.h b/src/FR_math.h index a251316..ca4b096 100644 --- a/src/FR_math.h +++ b/src/FR_math.h @@ -258,7 +258,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /*================================================ * Constants used in Trig tables, definitions * - * FR_TRIG_PREC — internal table precision (s0.15, kept for table indexing) + * FR_TRIG_PREC — internal table precision (u0.15, sine table) * FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1) * FR_TRIG_ONE — exact 1.0 in output format (1 << 16 = 65536) * @@ -328,8 +328,10 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { #define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10)) /* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits). - * CAUTION: overflows s32 when |x| > ~256 deg at s15.16 (x<<7 term). - * For safe conversion at any radix, use fr_deg_to_bam() instead. */ + * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term), + * but the overflow is harmless when the result is truncated to u16 BAM + * (two's complement wrapping preserves modular correctness). + * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */ #define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9)) /* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */ @@ -337,9 +339,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits). * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term). - * For safe conversion at any radix, use fr_rad_to_bam() instead. */ -#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) - + * For safe conversion at any radix, use fr_rad_to_bam() instead. + * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */ +#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14)) /* ── Overflow-safe rad/deg to BAM conversion functions ───────────── * * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that @@ -353,52 +355,25 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe. */ -/* Constants at radix 16 */ -#define FR_PI_R16 205887 /* round(pi * 65536) */ -#define FR_TWO_PI_R16 411775 /* round(2*pi * 65536) */ -#define FR_D90_R16 5898240 /* 90 * 65536 */ -#define FR_D180_R16 11796480 /* 180 * 65536 */ -#define FR_D360_R16 23592960 /* 360 * 65536 */ +/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc. + * Compiler evaluates at compile time when r is a constant. + * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */ +#define FR_PI(r) ((s32)(3.14159265358979323846 * (1LL << (r)) + 0.5)) +#define FR_TWO_PI(r) ((s32)(6.28318530717958647692 * (1LL << (r)) + 0.5)) +#define FR_HALF_PI(r) ((s32)(1.57079632679489661923 * (1LL << (r)) + 0.5)) +#define FR_THREE_HALF_PI(r) ((s32)(4.71238898038468985769 * (1LL << (r)) + 0.5)) -static u16 __attribute__((unused)) fr_rad_to_bam(s32 rad, u16 radix) -{ - /* Normalize to radix 16 */ - s32 r = (radix > 16) ? (rad >> (radix - 16)) - : (radix < 16) ? (rad << (16 - radix)) - : rad; - - /* Reduce to [-pi, pi] — one conditional pass, covers ±2*pi input */ - if (r > FR_PI_R16) r -= FR_TWO_PI_R16; - if (r < -FR_PI_R16) r += FR_TWO_PI_R16; - - /* Shift terms reordered: interleave negatives early to keep all - * intermediate sums within s32. Same 7-term decomposition as - * FR_RAD2BAM, just reordered. Safe for |r| <= 205887 (pi). */ - s32 bam = (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1); - return (u16)((bam + (1 << 15)) >> 16); -} +/* Convenience aliases at radix 16 */ +#define FR_PI_R16 FR_PI(16) +#define FR_TWO_PI_R16 FR_TWO_PI(16) -static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix) -{ - /* Normalize to radix 16 */ - s32 d = (radix > 16) ? (deg >> (radix - 16)) - : (radix < 16) ? (deg << (16 - radix)) - : deg; - - /* Reduce to [-180, 180) — covers ±360 input */ - if (d >= FR_D180_R16) d -= FR_D360_R16; - if (d < -FR_D180_R16) d += FR_D360_R16; - - /* Reduce to [-90, 90) with BAM quadrant offset. - * Needed because 182 * 11796480 (±180° at r16) overflows s32. */ - u16 offset = 0; - if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } - else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } - - /* |d| < 90° at r16. Max intermediate = 5898240 * 192 = 1.13B, safe. */ - s32 bam = (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); - return (u16)(offset + (u16)((bam + (1 << 15)) >> 16)); -} +/* Degree constants at radix 16 (exact — no truncation) */ +#define FR_D90_R16 ((s32)90 << 16) +#define FR_D180_R16 ((s32)180 << 16) +#define FR_D360_R16 ((s32)360 << 16) + + u16 fr_rad_to_bam(s32 rad, u16 radix); + u16 fr_deg_to_bam(s32 deg, u16 radix); /* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */ #define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25)) @@ -441,13 +416,25 @@ static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix) * fr_cos(rad, radix) — cos of radians at radix, s15.16 result * fr_sin(rad, radix) — sin of radians at radix, s15.16 result * fr_tan(rad, radix) — tan of radians at radix, s15.16 result - * fr_cos_deg(deg) — cos of integer degrees, s15.16 result - * fr_sin_deg(deg) — sin of integer degrees, s15.16 result + * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result + * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result + * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result * * All go through the same 129-entry quadrant table with linear interpolation. * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four * cardinal angles where the result is exact. + * + * FR_USE_EXTENDED_TRIG_PREC (default: ON) enables sub-BAM interpolation + * in fr_sin/fr_cos/fr_tan (the radian/degree-input functions). This adds + * one extra multiply per call but recovers ~16 bits of sub-BAM precision. + * To disable (faster, no multiply in the trig hot path): + * + * #define FR_USE_EXTENDED_TRIG_PREC 0 + * #include "FR_math.h" */ +#ifndef FR_USE_EXTENDED_TRIG_PREC +#define FR_USE_EXTENDED_TRIG_PREC 1 +#endif s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); s32 fr_tan_bam(u16 bam); @@ -458,26 +445,32 @@ static u16 __attribute__((unused)) fr_deg_to_bam(s32 deg, u16 radix) /* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */ #define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) -#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) -#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) +/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */ +/* #define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ +/* #define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ /*=============================================== - * Integer-degree trig API (thin wrappers over the BAM-native path) - * - * FR_CosI(deg) — cos of integer degrees, s15.16 result - * FR_SinI(deg) — sin of integer degrees, s15.16 result - * FR_TanI(deg) — tan of integer degrees, s15.16 result - * FR_Cos(deg, radix) — cos of fixed-radix degrees, s15.16 result - * FR_Sin(deg, radix) — sin of fixed-radix degrees, s15.16 result - * FR_Tan(deg, radix) — tan of fixed-radix degrees, s15.16 result + * Degree-input trig API + * + * FR_CosI(deg) — cos of integer degrees, s15.16 result + * FR_SinI(deg) — sin of integer degrees, s15.16 result + * FR_TanI(deg) — tan of integer degrees, s15.16 result + * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result + * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result + * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result */ #define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) #define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) - s32 FR_Cos(s32 deg, u16 radix); - s32 FR_Sin(s32 deg, u16 radix); + s32 fr_cos_deg(s32 deg, u16 radix); + s32 fr_sin_deg(s32 deg, u16 radix); s32 FR_TanI(s32 deg); - s32 FR_Tan(s32 deg, u16 radix); + s32 fr_tan_deg(s32 deg, u16 radix); + + /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */ + #define FR_Sin fr_sin_deg + #define FR_Cos fr_cos_deg + #define FR_Tan fr_tan_deg /* Inverse trig — output in radians at caller-specified radix (s32). * FR_atan2 returns radians at radix 16 (s15.16). diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h index 983c4e2..f57edd6 100644 --- a/src/FR_trig_table.h +++ b/src/FR_trig_table.h @@ -1,14 +1,14 @@ /** - * @file FR_trig_table.h - 129-entry quadrant cosine table for FR_Math 2.0 + * @file FR_trig_table.h - 129-entry quadrant sine table for FR_Math 2.0 * * This table covers one quadrant [0, pi/2] inclusive in 128 intervals (so * 129 entries). Indexed by a 7-bit BAM (binary angular measure) sub-index. - * Used by fr_cos_bam / fr_sin_bam in FR_math.c. + * Used by fr_sin_bam / fr_cos_bam in FR_math.c. * - * Output format: s0.15 (signed, 15 fractional bits). So - * gFR_COS_TAB_Q[0] = round(cos(0) * 32767) = 32767 - * gFR_COS_TAB_Q[64] = round(cos(pi/4) * 32767) ~ 23170 - * gFR_COS_TAB_Q[128] = round(cos(pi/2) * 32767) = 0 + * Output format: u0.15 (unsigned, 15 fractional bits). So + * gFR_SIN_TAB_Q[0] = round(sin(0) * 32768) = 0 + * gFR_SIN_TAB_Q[64] = round(sin(pi/4) * 32768) = 23170 + * gFR_SIN_TAB_Q[128] = round(sin(pi/2) * 32768) = 32768 * * Generated by tools/coef-gen.py — do not hand-edit. * @@ -27,7 +27,7 @@ extern "C" { #define FR_TRIG_TABLE_BITS (7) /* log2(intervals) */ #define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) /* entries = intervals + 1 */ -/* Derived constants for fr_cos_bam / fr_sin_bam. +/* Derived constants for fr_sin_bam / fr_cos_bam. * * The BAM has 16 bits total: 2 top bits for quadrant, 14 bits in-quadrant. * The in-quadrant value is split into (FR_TRIG_TABLE_BITS) table-index bits @@ -44,24 +44,24 @@ extern "C" { #define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) /* rounding bias */ #define FR_TRIG_QUADRANT (1 << 14) /* in-quadrant span */ -static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = { - 32767, 32765, 32757, 32745, 32728, 32705, 32678, 32646, - 32609, 32567, 32521, 32469, 32412, 32351, 32285, 32213, - 32137, 32057, 31971, 31880, 31785, 31685, 31580, 31470, - 31356, 31237, 31113, 30985, 30852, 30714, 30571, 30424, - 30273, 30117, 29956, 29791, 29621, 29447, 29268, 29085, - 28898, 28706, 28510, 28310, 28105, 27896, 27683, 27466, - 27245, 27019, 26790, 26556, 26319, 26077, 25832, 25582, - 25329, 25072, 24811, 24547, 24279, 24007, 23731, 23452, - 23170, 22884, 22594, 22301, 22005, 21705, 21403, 21096, - 20787, 20475, 20159, 19841, 19519, 19195, 18868, 18537, - 18204, 17869, 17530, 17189, 16846, 16499, 16151, 15800, - 15446, 15090, 14732, 14372, 14010, 13645, 13279, 12910, - 12539, 12167, 11793, 11417, 11039, 10659, 10278, 9896, - 9512, 9126, 8739, 8351, 7962, 7571, 7179, 6786, - 6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612, - 3212, 2811, 2410, 2009, 1608, 1206, 804, 401, - 0 +static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { + 0, 402, 804, 1206, 1608, 2009, 2411, 2811, + 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, + 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, + 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, + 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, + 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, + 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, + 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, + 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, + 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, + 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, + 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, + 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, + 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, + 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, + 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, + 32768 }; /* ---- Tangent table: 65 entries covering one octant [0, pi/4] ---- diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp index 3daaff9..f1d7c5d 100644 --- a/tests/test_tdd.cpp +++ b/tests/test_tdd.cpp @@ -58,7 +58,7 @@ * ============================================================ */ static inline double frd(s32 x, int radix) { - return (double)x / (double)(1L << radix); + return (double)x / ldexp(1.0, radix); } typedef struct { @@ -73,13 +73,19 @@ typedef struct { double worst_pct_input; /* input that produced max pct error */ double worst_pct_actual; double worst_pct_expected; + /* Clamped-denominator relative error: denom = max(|expected|, 1% of full_scale) */ + double max_pct_err_clamped; + double sum_pct_err_clamped; + double worst_clamped_input; + double worst_clamped_actual; + double worst_clamped_expected; } stats_t; static void stats_reset(stats_t *s) { memset(s, 0, sizeof(*s)); } -static void stats_add(stats_t *s, double in, double actual, double expected) { +static void stats_add(stats_t *s, double in, double actual, double expected, double full_scale) { double e = actual - expected; if (e < 0) e = -e; if (s->n == 0 || e > s->max_abs_err) { @@ -97,6 +103,17 @@ static void stats_add(stats_t *s, double in, double actual, double expected) { s->worst_pct_expected = expected; } s->sum_pct_err += pct; + /* Clamped-denominator relative error: floor = 1% of full_scale */ + double floor_val = 0.01 * full_scale; + double denom = fabs(expected) > floor_val ? fabs(expected) : floor_val; + double pct_clamped = (denom > 0.0) ? (e / denom) * 100.0 : 0.0; + if (pct_clamped > s->max_pct_err_clamped) { + s->max_pct_err_clamped = pct_clamped; + s->worst_clamped_input = in; + s->worst_clamped_actual = actual; + s->worst_clamped_expected = expected; + } + s->sum_pct_err_clamped += pct_clamped; s->n++; } @@ -104,8 +121,8 @@ static double stats_mean(const stats_t *s) { return s->n ? s->sum_abs_err / s->n : 0.0; } -static double stats_mean_pct(const stats_t *s) { - return s->n ? s->sum_pct_err / s->n : 0.0; +static double stats_mean_pct_clamped(const stats_t *s) { + return s->n ? s->sum_pct_err_clamped / s->n : 0.0; } /* Quantize a double to s15.16 resolution (same grid as library output). */ @@ -113,8 +130,13 @@ static inline double q16(double x) { return floor(x * 65536.0 + 0.5) / 65536.0; } +/* Round-to-nearest float→fixed conversion (not truncation). */ +static inline s32 tofix(double v, int p) { + return (s32)floor(ldexp(v, p) + 0.5); +} + /* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */ -static const double TAN_CLAMP = (double)0x7fffffff / (double)(1L << 16); +static const double TAN_CLAMP = (double)0x7fffffff / 65536.0; static double tan_ref(double rad) { double t = tan(rad); @@ -129,9 +151,9 @@ static int g_showpeak = 0; /* Print one accuracy table row, optionally with peak-error input */ static void acc_row(const char *name, const stats_t *s, const char *note) { printf("| %s | %.4f | %.4f | %s", - name, s->max_pct_err, stats_mean_pct(s), note); + name, s->max_pct_err_clamped, stats_mean_pct_clamped(s), note); if (g_showpeak) - printf(" | %.4g", s->worst_pct_input); + printf(" | %.4g", s->worst_clamped_input); printf(" |\n"); } @@ -647,8 +669,8 @@ static void section_arithmetic(void) { }; for (int i = 0; i < (int)(sizeof(div_cases)/sizeof(div_cases[0])); i++) { int r = div_cases[i].r; - s32 xfp = (s32)(div_cases[i].xd * (1L << r)); - s32 yfp = (s32)(div_cases[i].yd * (1L << r)); + s32 xfp = tofix(div_cases[i].xd, r); + s32 yfp = tofix(div_cases[i].yd, r); double expected = div_cases[i].xd / div_cases[i].yd; s32 d64 = FR_DIV(xfp, r, yfp, r); s32 d32 = FR_DIV32(xfp, r, yfp, r); @@ -681,8 +703,8 @@ static void section_trig_int(void) { double exp_sin = sin(deg * M_PI / 180.0); double act_cos = frd(FR_CosI((s16)deg), FR_TRIG_OUT_PREC); double act_sin = frd(FR_SinI((s16)deg), FR_TRIG_OUT_PREC); - stats_add(&cos_stats, deg, act_cos, exp_cos); - stats_add(&sin_stats, deg, act_sin, exp_sin); + stats_add(&cos_stats, deg, act_cos, exp_cos, 1.0); + stats_add(&sin_stats, deg, act_sin, exp_sin, 1.0); } table_header_stats(); @@ -698,7 +720,7 @@ static void section_trig_int(void) { if (deg % 90 == 0 && deg != 0) { tan_skipped++; continue; } double exp_tan = tan(deg * M_PI / 180.0); double act_tan = frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC); - stats_add(&tan_stats, deg, act_tan, exp_tan); + stats_add(&tan_stats, deg, act_tan, exp_tan, TAN_CLAMP); } table_header_stats(); table_row_stats("FR_TanI [-89..89]", &tan_stats); @@ -736,8 +758,8 @@ static void section_trig_frac(void) { double exp_s = sin(deg_d * M_PI / 180.0); double act_c = frd(FR_Cos(deg_fr, 8), FR_TRIG_OUT_PREC); double act_s = frd(FR_Sin(deg_fr, 8), FR_TRIG_OUT_PREC); - stats_add(&cos_f, deg_d, act_c, exp_c); - stats_add(&sin_f, deg_d, act_s, exp_s); + stats_add(&cos_f, deg_d, act_c, exp_c, 1.0); + stats_add(&sin_f, deg_d, act_s, exp_s, 1.0); } table_header_stats(); table_row_stats("FR_Cos r8 0.25 step", &cos_f); @@ -773,10 +795,11 @@ static void section_inverse_trig(void) { /* radix 15 inputs, output radians at radix 16, 200 samples */ for (int i = -200; i <= 200; i++) { double xd = i / 200.0; - s32 fr = (s32)(xd * (1 << 15)); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); s32 rad = FR_acos(fr, 15, 16); - double ref_rad = acos(xd); - stats_add(&acos_stats, xd, frd(rad, 16), ref_rad); + double ref_rad = acos(actual_xd); + stats_add(&acos_stats, actual_xd, frd(rad, 16), ref_rad, M_PI); } table_header_stats(); table_row_stats("FR_acos vs acos() (rad)", &acos_stats); @@ -787,10 +810,11 @@ static void section_inverse_trig(void) { stats_reset(&asin_stats); for (int i = -200; i <= 200; i++) { double xd = i / 200.0; - s32 fr = (s32)(xd * (1 << 15)); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); s32 rad = FR_asin(fr, 15, 16); - double ref_rad = asin(xd); - stats_add(&asin_stats, xd, frd(rad, 16), ref_rad); + double ref_rad = asin(actual_xd); + stats_add(&asin_stats, actual_xd, frd(rad, 16), ref_rad, M_PI); } table_header_stats(); table_row_stats("FR_asin vs asin() (rad)", &asin_stats); @@ -826,13 +850,13 @@ static void section_pow_log(void) { stats_t pow2_stats; stats_reset(&pow2_stats); for (int i = 0; i < (int)(sizeof(pow2_inputs)/sizeof(pow2_inputs[0])); i++) { double x = pow2_inputs[i]; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); s32 r = FR_pow2(fr, 16); double rd = frd(r, 16); double ref = pow(2.0, x); double err = rd - ref; if (err < 0) err = -err; double rel = ref != 0.0 ? err / fabs(ref) : err; - stats_add(&pow2_stats, x, rd, ref); + stats_add(&pow2_stats, x, rd, ref, pow(2.0, 8.0)); printf("| %.4g | %ld | %.6g | %.6g | %.4g | %.4g |\n", x, (long)r, rd, ref, err, rel); } @@ -845,11 +869,12 @@ static void section_pow_log(void) { stats_t pow2_fine; stats_reset(&pow2_fine); for (int i = -800; i <= 800; i++) { double x = i / 100.0; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_pow2(fr, 16); double rd = frd(r, 16); - double ref = pow(2.0, x); - stats_add(&pow2_fine, x, rd, ref); + double ref = pow(2.0, actual_x); + stats_add(&pow2_fine, actual_x, rd, ref, pow(2.0, 8.0)); } table_header_stats(); table_row_stats("FR_pow2 [-8,8] step 0.01", &pow2_fine); @@ -884,7 +909,7 @@ static void section_pow_log(void) { printf("| %ld | %u | %u | %ld | %.6g | %.6g |\n", (long)log2_cases[i].in, log2_cases[i].r, log2_cases[i].or_, (long)r, rd, log2_cases[i].ref); - stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref); + stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref, log2(32000.0)); } printf("\n"); table_header_stats(); @@ -897,11 +922,11 @@ static void section_pow_log(void) { double ln_inputs[] = {1, 2, M_E, 4, 8, 10, 100, 1000}; stats_t ln_stats; stats_reset(&ln_stats); for (int i = 0; i < (int)(sizeof(ln_inputs)/sizeof(ln_inputs[0])); i++) { - s32 fr = (s32)(ln_inputs[i] * (1L << 16)); + s32 fr = tofix(ln_inputs[i], 16); s32 r = FR_ln(fr, 16, 16); double rd = frd(r, 16); double ref = log(ln_inputs[i]); - stats_add(&ln_stats, ln_inputs[i], rd, ref); + stats_add(&ln_stats, ln_inputs[i], rd, ref, log(32000.0)); printf("| %.4g | %ld | %.6g | %.6g |\n", ln_inputs[i], (long)r, rd, ref); } printf("\n"); @@ -914,11 +939,11 @@ static void section_pow_log(void) { double log10_inputs[] = {1, 2, 5, 10, 100, 1000, 10000}; stats_t log10_stats; stats_reset(&log10_stats); for (int i = 0; i < (int)(sizeof(log10_inputs)/sizeof(log10_inputs[0])); i++) { - s32 fr = (s32)(log10_inputs[i] * (1L << 16)); + s32 fr = tofix(log10_inputs[i], 16); s32 r = FR_log10(fr, 16, 16); double rd = frd(r, 16); double ref = log10(log10_inputs[i]); - stats_add(&log10_stats, log10_inputs[i], rd, ref); + stats_add(&log10_stats, log10_inputs[i], rd, ref, log10(32000.0)); printf("| %.4g | %ld | %.6g | %.6g |\n", log10_inputs[i], (long)r, rd, ref); } printf("\n"); @@ -929,14 +954,14 @@ static void section_pow_log(void) { md_h3("8.6 FR_EXP and FR_POW10 macros (wrap FR_pow2)"); printf("| Expression | Result | as double | Reference | Note |\n|---|---:|---:|---:|---|\n"); { - s32 in = (s32)(1.0 * (1L << 16)); + s32 in = tofix(1.0, 16); s32 r = FR_EXP(in, 16); double rd = frd(r, 16); printf("| FR_EXP(1.0,16) | %ld | %.6g | %.6g | exp(1) = e |\n", (long)r, rd, M_E); } { - s32 in = (s32)(2.0 * (1L << 16)); + s32 in = tofix(2.0, 16); s32 r = FR_POW10(in, 16); double rd = frd(r, 16); printf("| FR_POW10(2.0,16) | %ld | %.6g | %.6g | 10^2 = 100 |\n", @@ -1265,14 +1290,15 @@ static void section_v2_new(void) { stats_t sqrt_stats; stats_reset(&sqrt_stats); for (int i = 0; i < (int)(sizeof(sqrt_inputs)/sizeof(sqrt_inputs[0])); i++) { double x = sqrt_inputs[i]; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_sqrt(fr, 16); double rd = frd(r, 16); - double ref = sqrt(x); + double ref = sqrt(actual_x); double err = rd - ref; if (err < 0) err = -err; - stats_add(&sqrt_stats, x, rd, ref); + stats_add(&sqrt_stats, actual_x, rd, ref, sqrt(32000.0)); printf("| %.6g | %ld | %.6g | %.6g | %.4g |\n", - x, (long)r, rd, ref, err); + actual_x, (long)r, rd, ref, err); } printf("\n"); table_header_stats(); @@ -1283,11 +1309,12 @@ static void section_v2_new(void) { stats_t sqrt_fine; stats_reset(&sqrt_fine); for (int i = 1; i <= 1000; i++) { double x = i * 10.0; /* 10..10000 */ - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_sqrt(fr, 16); double rd = frd(r, 16); - double ref = sqrt(x); - stats_add(&sqrt_fine, x, rd, ref); + double ref = sqrt(actual_x); + stats_add(&sqrt_fine, actual_x, rd, ref, sqrt(32000.0)); } table_header_stats(); table_row_stats("FR_sqrt [10,10000]", &sqrt_fine); @@ -1313,16 +1340,16 @@ static void section_v2_new(void) { }; stats_t hyp_stats; stats_reset(&hyp_stats); for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) { - s32 fx = (s32)(hyp_cases[i].x * (1L << 16)); - s32 fy = (s32)(hyp_cases[i].y * (1L << 16)); + s32 fx = tofix(hyp_cases[i].x, 16); + s32 fy = tofix(hyp_cases[i].y, 16); + double actual_x = frd(fx, 16), actual_y = frd(fy, 16); s32 r = FR_hypot(fx, fy, 16); double rd = frd(r, 16); - double ref = hypot(hyp_cases[i].x, hyp_cases[i].y); + double ref = hypot(actual_x, actual_y); double err = rd - ref; if (err < 0) err = -err; - stats_add(&hyp_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y), - rd, ref); + stats_add(&hyp_stats, ref, rd, ref, hypot(1000.0, 1000.0)); printf("| %g | %g | %ld | %.6g | %.6g | %.4g |\n", - hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err); + actual_x, actual_y, (long)r, rd, ref, err); } printf("\n"); table_header_stats(); @@ -1334,17 +1361,17 @@ static void section_v2_new(void) { printf("|---:|---:|---:|---:|---:|---:|---:|\n"); stats_t hf8_stats; stats_reset(&hf8_stats); for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) { - s32 fx = (s32)(hyp_cases[i].x * (1L << 16)); - s32 fy = (s32)(hyp_cases[i].y * (1L << 16)); + s32 fx = tofix(hyp_cases[i].x, 16); + s32 fy = tofix(hyp_cases[i].y, 16); + double actual_x = frd(fx, 16), actual_y = frd(fy, 16); s32 r = FR_hypot_fast8(fx, fy); double rd = frd(r, 16); - double ref = hypot(hyp_cases[i].x, hyp_cases[i].y); + double ref = hypot(actual_x, actual_y); double err = rd - ref; if (err < 0) err = -err; double rel = (ref > 0) ? err / ref * 100.0 : 0.0; - stats_add(&hf8_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y), - rd, ref); + stats_add(&hf8_stats, ref, rd, ref, hypot(1000.0, 1000.0)); printf("| %g | %g | %ld | %.6g | %.6g | %.4g | %.4g |\n", - hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err, rel); + actual_x, actual_y, (long)r, rd, ref, err, rel); } printf("\n"); table_header_stats(); @@ -1400,7 +1427,7 @@ static void section_v2_new(void) { else if (t < 0.50) ideal = 2.0 - 4.0 * t; /* 1 → 0 */ else if (t < 0.75) ideal = -4.0 * (t - 0.5); /* 0 → -1 */ else ideal = -1.0 + 4.0 * (t - 0.75); /* -1 → 0 */ - stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal); + stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal, 1.0); } table_header_stats(); table_row_stats("fr_wave_tri vs ideal", &tri_stats); @@ -1486,8 +1513,8 @@ static void section_multiradix(void) { int log2_radixes[] = {8, 12, 16, 24}; for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); /* stay well within s32 */ + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); /* stay well within s32 */ stats_t st; stats_reset(&st); /* Sweep from 0.125 to max representable value */ @@ -1498,24 +1525,26 @@ static void section_multiradix(void) { for (int i = 0; i < ninp; i++) { if (inputs[i] > max_val) continue; /* would overflow s32 */ - s32 fr = (s32)(inputs[i] * scale); + s32 fr = tofix(inputs[i], R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log2(inputs[i]); - stats_add(&st, inputs[i], rd, ref); + double ref = log2(actual_x); + stats_add(&st, actual_x, rd, ref, log2(32000.0)); } /* Fine-grained sweep in [1, min(100, max_val)] */ double sweep_max = max_val < 100.0 ? max_val : 100.0; for (int i = 1; i <= 500; i++) { double x = 1.0 + ((sweep_max - 1.0) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log2(x); - stats_add(&st, x, rd, ref); + double ref = log2(actual_x); + stats_add(&st, actual_x, rd, ref, log2(32000.0)); } double lsb = 1.0 / scale; @@ -1535,19 +1564,20 @@ static void section_multiradix(void) { for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); double sweep_max = max_val < 100.0 ? max_val : 100.0; stats_t st; stats_reset(&st); for (int i = 1; i <= 500; i++) { double x = 0.5 + ((sweep_max - 0.5) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_ln(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log(x); - stats_add(&st, x, rd, ref); + double ref = log(actual_x); + stats_add(&st, actual_x, rd, ref, log(32000.0)); } double lsb = 1.0 / scale; @@ -1567,19 +1597,20 @@ static void section_multiradix(void) { for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); double sweep_max = max_val < 1000.0 ? max_val : 1000.0; stats_t st; stats_reset(&st); for (int i = 1; i <= 500; i++) { double x = 0.5 + ((sweep_max - 0.5) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log10(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log10(x); - stats_add(&st, x, rd, ref); + double ref = log10(actual_x); + stats_add(&st, actual_x, rd, ref, log10(32000.0)); } double lsb = 1.0 / scale; @@ -1600,8 +1631,8 @@ static void section_multiradix(void) { int div_radixes[] = {8, 12, 16, 20}; for (int ri = 0; ri < 4; ri++) { int R = div_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)(1L << (30 - R)); /* stay within s32 */ + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); /* stay within s32 */ stats_t st_rnd, st_trunc; stats_reset(&st_rnd); stats_reset(&st_trunc); @@ -1620,18 +1651,18 @@ static void section_multiradix(void) { double aq = ay > 0 ? ax / ay : 1e30; /* Skip if inputs or quotient would overflow s32 at this radix */ if (ax >= max_val || ay >= max_val || aq >= max_val) continue; - s32 xfp = (s32)(x * scale); - s32 yfp = (s32)(y * scale); + s32 xfp = tofix(x, R); + s32 yfp = tofix(y, R); if (yfp == 0) continue; - double ref = x / y; + double ref = frd(xfp, R) / frd(yfp, R); s32 d_rnd = FR_DIV(xfp, R, yfp, R); s32 d_trunc = FR_DIV_TRUNC(xfp, R, yfp, R); double rd_rnd = frd(d_rnd, R); double rd_trunc = frd(d_trunc, R); - stats_add(&st_rnd, x / y, rd_rnd, ref); - stats_add(&st_trunc, x / y, rd_trunc, ref); + stats_add(&st_rnd, x / y, rd_rnd, ref, 1.0); + stats_add(&st_trunc, x / y, rd_trunc, ref, 1.0); } } @@ -1658,9 +1689,9 @@ static void section_multiradix(void) { }; for (int i = 0; i < (int)(sizeof(sign_cases)/sizeof(sign_cases[0])); i++) { int R = sign_cases[i].r; - double scale = (double)(1L << R); - s32 xfp = (s32)(sign_cases[i].x * scale); - s32 yfp = (s32)(sign_cases[i].y * scale); + double scale = ldexp(1.0, R); + s32 xfp = tofix(sign_cases[i].x, R); + s32 yfp = tofix(sign_cases[i].y, R); s32 d = FR_DIV(xfp, R, yfp, R); double rd = frd(d, R); double ref = sign_cases[i].x / sign_cases[i].y; @@ -1683,7 +1714,7 @@ static void section_multiradix(void) { int exp_radixes[] = {8, 12, 16, 20}; for (int ri = 0; ri < 4; ri++) { int R = exp_radixes[ri]; - double scale = (double)(1L << R); + double scale = ldexp(1.0, R); stats_t st_exp, st_pow10; stats_reset(&st_exp); stats_reset(&st_pow10); @@ -1691,23 +1722,25 @@ static void section_multiradix(void) { /* Sweep exp(x) for x in [-4, 4] in steps of 0.05 */ for (int i = -80; i <= 80; i++) { double x = i / 20.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_EXP(fr, R); double rd = frd(r, R); - double ref = exp(x); - if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R))) - stats_add(&st_exp, x, rd, ref); + double ref = exp(actual_x); + if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R)) + stats_add(&st_exp, actual_x, rd, ref, 32000.0); } /* Sweep pow10(x) for x in [-2, 2] in steps of 0.05 */ for (int i = -40; i <= 40; i++) { double x = i / 20.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10(fr, R); double rd = frd(r, R); - double ref = pow(10.0, x); - if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R))) - stats_add(&st_pow10, x, rd, ref); + double ref = pow(10.0, actual_x); + if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R)) + stats_add(&st_pow10, actual_x, rd, ref, 32000.0); } double lsb = 1.0 / scale; @@ -1794,20 +1827,92 @@ static void section_summary(void) { * README.md, docs/README.md, and pages/index.html. * ============================================================ */ +/* ── Neighborhood printer ────────────────────────────────────────── + * Print ±K samples around a center index for any trig sweep. + * func_type selects the function to evaluate: + * 0 = fr_sin_bam 1 = fr_cos_bam 2 = fr_tan_bam + * 3 = fr_sin 4 = fr_cos 5 = fr_tan + * 6 = FR_SinI 7 = FR_CosI 8 = FR_TanI + * 9 = fr_sin_deg 10 = fr_cos_deg 11 = fr_tan_deg + */ +static void neighborhood(const char *label, int func_type, + int center_i, int half, int N, + double range_lo, double range_hi) +{ + printf("\n**Neighborhood: %s (center i=%d ±%d)**\n\n", label, center_i, half); + printf("| i | deg | input_fp | expected | got | abs_err | pct_err |\n"); + printf("|---|---|---|---|---|---|---|\n"); + + for (int k = -half; k <= half; k++) { + int i = (center_i + k % N + N) % N; + double deg, angle, exp_v, got_v; + s32 fp; + + switch (func_type) { + case 0: case 1: case 2: { /* BAM: 0..65535 */ + u16 bam = (u16)i; + deg = bam * 360.0 / 65536.0; + angle = deg * M_PI / 180.0; + if (func_type == 0) { exp_v = q16(sin(angle)); got_v = frd(fr_sin_bam(bam), 16); } + else if (func_type == 1) { exp_v = q16(cos(angle)); got_v = frd(fr_cos_bam(bam), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(fr_tan_bam(bam), 16); } + fp = (s32)bam; + break; + } + case 3: case 4: case 5: { /* radian: ±2π, 131072 pts */ + angle = range_lo + (range_hi - range_lo) * i / (double)N; + fp = tofix(angle, 16); + double actual_angle = frd(fp, 16); + deg = actual_angle * 180.0 / M_PI; + if (func_type == 3) { exp_v = q16(sin(actual_angle)); got_v = frd(fr_sin(fp, 16), 16); } + else if (func_type == 4) { exp_v = q16(cos(actual_angle)); got_v = frd(fr_cos(fp, 16), 16); } + else { exp_v = q16(tan_ref(actual_angle)); got_v = frd(fr_tan(fp, 16), 16); } + break; + } + case 6: case 7: case 8: { /* integer degrees */ + int d = (int)range_lo + i; + deg = (double)d; + angle = d * M_PI / 180.0; + fp = (s32)d; + if (func_type == 6) { exp_v = q16(sin(angle)); got_v = frd(FR_SinI(d), 16); } + else if (func_type == 7) { exp_v = q16(cos(angle)); got_v = frd(FR_CosI(d), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_TanI((s16)d), 16); } + break; + } + default: { /* fixed-radix degrees: ±360, 131072 pts */ + deg = range_lo + (range_hi - range_lo) * i / (double)N; + fp = tofix(deg, 16); + double actual_deg = frd(fp, 16); + angle = actual_deg * M_PI / 180.0; + if (func_type == 9) { exp_v = q16(sin(angle)); got_v = frd(FR_Sin(fp, 16), 16); } + else if (func_type == 10) { exp_v = q16(cos(angle)); got_v = frd(FR_Cos(fp, 16), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_Tan(fp, 16), 16); } + break; + } + } + + double ae = fabs(got_v - exp_v); + double pe = (exp_v != 0.0) ? ae / fabs(exp_v) * 100.0 : (ae != 0.0 ? 100.0 : 0.0); + printf("| %d | %.6f | %d | %.6f | %.6f | %.6f | %.4f%% |\n", + i, deg, (int)fp, exp_v, got_v, ae, pe); + } + printf("\n"); +} + static void section_accuracy_table(void) { md_h2("14. Accuracy Summary Table"); printf("\n"); if (g_showpeak) { - printf("| Function | Max err (%%) | Avg err (%%) | Note | Peak at |\n"); + printf("| Function | Max err (%%)*| Avg err (%%) | Note | Peak at |\n"); printf("|---|---:|---:|---|---:|\n"); } else { - printf("| Function | Max err (%%) | Avg err (%%) | Note |\n"); + printf("| Function | Max err (%%)*| Avg err (%%) | Note |\n"); printf("|---|---:|---:|---|\n"); } const int R = 16; - const double scale = (double)(1L << R); + const double scale = ldexp(1.0, R); /* Persistent stats so we can print diagnostics after the table */ stats_t st_sincos, st_tan, st_asincos, st_atan2; @@ -1823,31 +1928,33 @@ static void section_accuracy_table(void) { for (int i = 0; i < 65536; i++) { u16 bam = (u16)i; double rad = bam * 2.0 * M_PI / 65536.0; - stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad))); - stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad))); + stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } acc_row("sin/cos (BAM)", &st, "fr_sin_bam/fr_cos_bam direct; 129-entry table"); } - /* --- sin / cos (degree wrappers: 65536-pt) --- */ + /* --- sin / cos (degree wrappers: 65536-pt at s15.16) --- */ { stats_t &st = st_sincos; - const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */ - for (int i = -32768; i <= 32767; i++) { - double deg = (double)i / (1 << radix); - double rad = deg * M_PI / 180.0; - stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), q16(sin(rad))); - stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), q16(cos(rad))); + const u16 radix = 16; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, radix); + double actual_deg = frd(deg_fp, radix); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, radix), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, radix), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360, -30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad))); - stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad))); + stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } - acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±256° (s16 at radix 7; FR_DEG2BAM)"); + acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM"); } /* --- sin / cos (radian wrappers: 65536-pt) --- */ @@ -1855,9 +1962,10 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = 0; i < 65536; i++) { double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); - s32 rad_fp = (s32)(angle * (1L << 16)); - stats_add(&st, angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(angle))); - stats_add(&st, angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(angle))); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + stats_add(&st, actual_angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0); + stats_add(&st, actual_angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0); } acc_row("sin/cos (rad)", &st, "fr_sin/fr_cos via fr_rad_to_bam ±2π r16"); } @@ -1871,27 +1979,29 @@ static void section_accuracy_table(void) { if (bam == 16384) ref = TAN_CLAMP; /* 90°: +maxint */ else if (bam == 49152) ref = -TAN_CLAMP; /* 270°: -maxint */ else ref = tan_ref(bam * 2.0 * M_PI / 65536.0); - stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref)); + stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref), TAN_CLAMP); } acc_row("tan (BAM)", &st, "fr_tan_bam 65536-pt full; ±maxint at poles"); } - /* --- tan (degree wrappers: 65536-pt, full sweep) --- */ + /* --- tan (degree wrappers: 65536-pt at s15.16, full sweep) --- */ { stats_t &st = st_tan; - const u16 radix = 7; - for (int i = -32768; i <= 32767; i++) { - double deg = (double)i / (1 << radix); - double rad = deg * M_PI / 180.0; - stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); + const u16 radix = 16; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, radix); + double actual_deg = frd(deg_fp, radix); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); + stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } - acc_row("tan (deg)", &st, "FR_Tan ±256° full (s16 at radix 7; FR_DEG2BAM); sat at poles"); + acc_row("tan (deg)", &st, "FR_Tan ±360° s15.16 full; sat at poles"); } /* --- tan (radian wrappers: 65536-pt, full sweep) --- */ @@ -1899,8 +2009,9 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = 0; i < 65536; i++) { double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); - s32 rad_fp = (s32)(angle * (1L << 16)); - stats_add(&st, angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(angle))); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + stats_add(&st, actual_angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); } acc_row("tan (rad)", &st, "fr_tan ±2π r16 full; sat at poles"); } @@ -1910,12 +2021,12 @@ static void section_accuracy_table(void) { stats_t &st = st_asincos; /* 65536-point sweep: all representable values at radix 15 over [-1, +1) */ for (int i = -32768; i <= 32767; i++) { - double xd = (double)i / (1 << 15); + double xd = (double)i / 32768.0; if (xd < -1.0 || xd > 1.0) continue; s32 rad = FR_asin((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), q16(asin(xd))); + stats_add(&st, xd, frd(rad, R), q16(asin(xd)), M_PI); rad = FR_acos((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), q16(acos(xd))); + stats_add(&st, xd, frd(rad, R), q16(acos(xd)), M_PI); } acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary"); } @@ -1936,19 +2047,19 @@ static void section_accuracy_table(void) { for (int i = -32767; i <= 32768; i++) { double angle = i * M_PI / 32768.0; double x = rad * cos(angle), y = rad * sin(angle); - s32 fx = (s32)(x * scale); - s32 fy = (s32)(y * scale); + s32 fx = tofix(x, R); + s32 fy = tofix(y, R); if (fx == 0 && fy == 0) continue; s32 afx = (fx < 0) ? -fx : fx; s32 afy = (fy < 0) ? -fy : fy; s32 minor = (afx < afy) ? afx : afy; if (minor < 256) continue; /* input quantization, not algo */ s32 r = FR_atan2(fy, fx, R); - double ref = atan2(y, x); + double ref = atan2((double)fy, (double)fx); /* Skip near ±pi branch cut: sign depends on sub-LSB * input quantization, not algorithm accuracy. */ if (fabs(fabs(ref) - M_PI) < 0.01) continue; - stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref)); + stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref), M_PI); } } /* Special cases: exact quadrant/octant/30-degree angles */ @@ -1957,10 +2068,10 @@ static void section_accuracy_table(void) { for (int si = 0; si < (int)(sizeof(specials_deg)/sizeof(specials_deg[0])); si++) { double angle = specials_deg[si] * M_PI / 180.0; double x = 100.0 * cos(angle), y = 100.0 * sin(angle); - s32 fx = (s32)(x * scale), fy = (s32)(y * scale); + s32 fx = tofix(x, R), fy = tofix(y, R); if (fx == 0 && fy == 0) continue; s32 r = FR_atan2(fy, fx, R); - stats_add(&st, specials_deg[si], frd(r, R), q16(atan2(y, x))); + stats_add(&st, specials_deg[si], frd(r, R), q16(atan2((double)fy, (double)fx)), M_PI); } acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8"); } @@ -1970,10 +2081,11 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -10000; i <= 10000; i++) { double x = i / 1000.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_atan(fr, (u16)R, (u16)R); - double ref = atan(x); - stats_add(&st, x, frd(r, R), q16(ref)); + double ref = atan(actual_x); + stats_add(&st, actual_x, frd(r, R), q16(ref), M_PI / 2.0); } acc_row("atan", &st, "20001-pt full sweep [-10,10]; via FR_atan2"); } @@ -1983,16 +2095,18 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); double inputs[] = {0.0001, 0.25, 0.5, 1, 2, 3, 4, 7, 9, 16, 25, 100, 1024, 10000, 32000}; for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) { - s32 fr = (s32)(inputs[i] * scale); + s32 fr = tofix(inputs[i], R); + double actual_x = frd(fr, R); s32 r = FR_sqrt(fr, R); - stats_add(&st, inputs[i], frd(r, R), q16(sqrt(inputs[i]))); + stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0)); } /* Fine sweep */ for (int i = 1; i <= 1000; i++) { double x = i * 10.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_sqrt(fr, R); - stats_add(&st, x, frd(r, R), q16(sqrt(x))); + stats_add(&st, actual_x, frd(r, R), q16(sqrt(actual_x)), sqrt(32000.0)); } acc_row("sqrt", &st, "Round-to-nearest"); } @@ -2002,18 +2116,20 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); /* Integer inputs — stay within s32 range at radix 16 (max ~32767) */ for (int v = 1; v <= 32000; v += (v < 100 ? 1 : v / 10)) { - s32 fr = (s32)((double)v * scale); + s32 fr = tofix((double)v, R); if (fr <= 0) continue; + double actual_v = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); - stats_add(&st, (double)v, frd(r, R), q16(log2((double)v))); + stats_add(&st, actual_v, frd(r, R), q16(log2(actual_v)), log2(32000.0)); } /* Fractional sweep 0.125 .. 1.0 */ for (int i = 1; i <= 100; i++) { double x = 0.125 + (0.875 * i / 100.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); - stats_add(&st, x, frd(r, R), q16(log2(x))); + stats_add(&st, actual_x, frd(r, R), q16(log2(actual_x)), log2(32000.0)); } acc_row("log2", &st, "65-entry mantissa table"); } @@ -2023,10 +2139,11 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -800; i <= 800; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_pow2(fr, R); - double ref = pow(2.0, x); - stats_add(&st, x, frd(r, R), q16(ref)); + double ref = pow(2.0, actual_x); + stats_add(&st, actual_x, frd(r, R), q16(ref), pow(2.0, 8.0)); } acc_row("pow2", &st, "65-entry fraction table"); } @@ -2036,14 +2153,15 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); double inputs[] = {0.125, 0.25, 0.5, 1, 2, M_E, 3, 4, 5, 7, 8, 10, 20, 50, 100, 1000}; for (int i = 0; i < (int)(sizeof(inputs)/sizeof(inputs[0])); i++) { - s32 fr = (s32)(inputs[i] * scale); + s32 fr = tofix(inputs[i], R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_ln(fr, R, R); - double ref = log(inputs[i]); - stats_add(&st, inputs[i], frd(r, R), q16(ref)); + double ref = log(actual_x); + stats_add(&st, actual_x, frd(r, R), q16(ref), log(32000.0)); r = FR_log10(fr, R, R); - ref = log10(inputs[i]); - stats_add(&st, inputs[i], frd(r, R), q16(ref)); + ref = log10(actual_x); + stats_add(&st, actual_x, frd(r, R), q16(ref), log10(32000.0)); } acc_row("ln, log10", &st, "Via FR_MULK28 from log2"); } @@ -2053,11 +2171,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -400; i <= 400; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_EXP(fr, R); - double ref = exp(x); + double ref = exp(actual_x); if (ref > 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */ - stats_add(&st, x, frd(r, R), q16(ref)); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("exp", &st, "FR_MULK28 + FR_pow2"); } @@ -2067,11 +2186,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -400; i <= 400; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_EXP_FAST(fr, R); - double ref = exp(x); + double ref = exp(actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), q16(ref)); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("exp_fast", &st, "Shift-only scaling"); } @@ -2081,11 +2201,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -200; i <= 200; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10(fr, R); - double ref = pow(10.0, x); + double ref = pow(10.0, actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), q16(ref)); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("pow10", &st, "FR_MULK28 + FR_pow2"); } @@ -2095,11 +2216,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -200; i <= 200; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10_FAST(fr, R); - double ref = pow(10.0, x); + double ref = pow(10.0, actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), q16(ref)); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("pow10_fast", &st, "Shift-only scaling"); } @@ -2112,11 +2234,12 @@ static void section_accuracy_table(void) { {1,1},{0.5,0.5},{100,100},{1000,1},{1,1000} }; for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) { - s32 fx = (s32)(cases[i].x * scale); - s32 fy = (s32)(cases[i].y * scale); + s32 fx = tofix(cases[i].x, R); + s32 fy = tofix(cases[i].y, R); + double actual_x = frd(fx, R), actual_y = frd(fy, R); s32 r = FR_hypot(fx, fy, R); - double ref = hypot(cases[i].x, cases[i].y); - stats_add(&st, ref, frd(r, R), q16(ref)); + double ref = hypot(actual_x, actual_y); + stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0)); } acc_row("hypot (exact)", &st, "64-bit intermediate"); } @@ -2129,17 +2252,18 @@ static void section_accuracy_table(void) { {100,100},{1000,1},{1,1000},{7,24},{20,21} }; for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) { - s32 fx = (s32)(cases[i].x * scale); - s32 fy = (s32)(cases[i].y * scale); + s32 fx = tofix(cases[i].x, R); + s32 fy = tofix(cases[i].y, R); + double actual_x = frd(fx, R), actual_y = frd(fy, R); s32 r = FR_hypot_fast8(fx, fy); - double ref = hypot(cases[i].x, cases[i].y); - if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref)); + double ref = hypot(actual_x, actual_y); + if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0)); } acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply"); } printf("\n"); - printf("\n"); + printf("\n*Relative error; reference clamped to 1%% of full-scale output.\n\n"); /* ── Test-only rows (not library functions — conversion & pipeline checks) ── */ md_h3("14.0.1 Conversion & pipeline accuracy (test-only)"); @@ -2151,7 +2275,7 @@ static void section_accuracy_table(void) { stats_t &st = st_rad2bam; for (int i = 0; i < 65536; i++) { double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); - s32 rad_fp = (s32)(angle * scale); + s32 rad_fp = tofix(angle, R); u16 got = fr_rad_to_bam(rad_fp, 16); /* Exact BAM: wrap to u16 */ double exact_bam_d = angle * 65536.0 / (2.0 * M_PI); @@ -2160,7 +2284,7 @@ static void section_accuracy_table(void) { /* Feed stats as degrees so the error is interpretable */ double got_deg = got * (360.0 / 65536.0); double exp_deg = expected * (360.0 / 65536.0); - stats_add(&st, angle, got_deg, exp_deg); + stats_add(&st, angle, got_deg, exp_deg, 360.0); } { char note[128]; @@ -2176,7 +2300,7 @@ static void section_accuracy_table(void) { stats_t &st = st_deg2bam; for (int i = 0; i < 65536; i++) { double deg = -360.0 + (720.0 * i / 65536.0); - s32 deg_fp = (s32)(deg * scale); + s32 deg_fp = tofix(deg, R); u16 got = fr_deg_to_bam(deg_fp, 16); /* Exact BAM: wrap to u16 */ double exact_bam_d = deg * 65536.0 / 360.0; @@ -2184,7 +2308,7 @@ static void section_accuracy_table(void) { u16 expected = (u16)(exact_bam_s & 0xFFFF); double got_deg = got * (360.0 / 65536.0); double exp_deg = expected * (360.0 / 65536.0); - stats_add(&st, deg, got_deg, exp_deg); + stats_add(&st, deg, got_deg, exp_deg, 360.0); } { char note[128]; @@ -2200,10 +2324,10 @@ static void section_accuracy_table(void) { stats_t &st = st_sincos_deg_s32; for (int deg = -360; deg <= 360; deg++) { double rad = deg * M_PI / 180.0; - stats_add(&st, (double)deg, frd(fr_sin_deg(deg), FR_TRIG_OUT_PREC), q16(sin(rad))); - stats_add(&st, (double)deg, frd(fr_cos_deg(deg), FR_TRIG_OUT_PREC), q16(cos(rad))); + stats_add(&st, (double)deg, frd(FR_SinI(deg), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, (double)deg, frd(FR_CosI(deg), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } - acc_row("sin/cos (int deg)", &st, "fr_sin_deg/fr_cos_deg ±360° integer degrees"); + acc_row("sin/cos (int deg)", &st, "FR_SinI/FR_CosI ±360° integer degrees"); } /* --- tan via integer degrees ±360° --- */ @@ -2211,7 +2335,7 @@ static void section_accuracy_table(void) { stats_t &st = st_tan_deg_s32; for (int deg = -360; deg <= 360; deg++) { double rad = deg * M_PI / 180.0; - stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad))); + stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles"); } @@ -2223,14 +2347,14 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = 0; i < 65536; i++) { double angle = -M_PI + (2.0 * M_PI * i / 65536.0); - s32 rad_fp = (s32)(angle * scale); + s32 rad_fp = tofix(angle, R); s32 raw = FR_RAD2BAM(rad_fp); u16 got = (u16)((raw + (1 << 15)) >> 16); double exact_d = angle * 65536.0 / (2.0 * M_PI); u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); double got_deg = got * (360.0 / 65536.0); double exp_deg = expected * (360.0 / 65536.0); - stats_add(&st, angle, got_deg, exp_deg); + stats_add(&st, angle, got_deg, exp_deg, 360.0); } acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad"); } @@ -2240,14 +2364,14 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); const u16 radix = 7; for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */ - double deg = (double)i / (1 << radix); + double deg = (double)i / 128.0; s32 raw = FR_DEG2BAM((s32)i); u16 got = (u16)((raw + (1 << (radix - 1))) >> radix); double exact_d = deg * 65536.0 / 360.0; u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); double got_deg = got * (360.0 / 65536.0); double exp_deg = expected * (360.0 / 65536.0); - stats_add(&st, deg, got_deg, exp_deg); + stats_add(&st, deg, got_deg, exp_deg, 360.0); } acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°"); } @@ -2261,7 +2385,7 @@ static void section_accuracy_table(void) { s32 rad_fp = FR_BAM2RAD(bam_r16); double got_rad = frd(rad_fp, 16); double exp_rad = (double)i * 2.0 * M_PI / 65536.0; - stats_add(&st, (double)i, got_rad, exp_rad); + stats_add(&st, (double)i, got_rad, exp_rad, 2.0 * M_PI); } acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)"); } @@ -2275,7 +2399,7 @@ static void section_accuracy_table(void) { s32 deg_fp = FR_BAM2DEG(bam_r16); double got_deg = frd(deg_fp, 16); double exp_deg = (double)i * 360.0 / 65536.0; - stats_add(&st, (double)i, got_deg, exp_deg); + stats_add(&st, (double)i, got_deg, exp_deg, 360.0); } acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)"); } @@ -2285,11 +2409,11 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = 0; i < 65536; i++) { double deg = -360.0 + (720.0 * i / 65536.0); - s32 deg_fp = (s32)(deg * scale); + s32 deg_fp = tofix(deg, R); s32 rad_fp = FR_DEG2RAD(deg_fp); double got_rad = frd(rad_fp, 16); double exp_rad = deg * M_PI / 180.0; - stats_add(&st, deg, got_rad, exp_rad); + stats_add(&st, deg, got_rad, exp_rad, 2.0 * M_PI); } acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full"); } @@ -2299,11 +2423,11 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = 0; i < 65536; i++) { double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); - s32 rad_fp = (s32)(angle * scale); + s32 rad_fp = tofix(angle, R); s32 deg_fp = FR_RAD2DEG(rad_fp); double got_deg = frd(deg_fp, 16); double exp_deg = angle * 180.0 / M_PI; - stats_add(&st, angle, got_deg, exp_deg); + stats_add(&st, angle, got_deg, exp_deg, 360.0); } acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full"); } @@ -2337,6 +2461,325 @@ static void section_accuracy_table(void) { s->max_pct_err); } printf("\n"); + + /* ── 14.3 Per-function trig sweep table ──────────────────────────── + * One row per public entry point. Each function is swept + * independently over its full domain so that peak abs / pct errors + * are attributable to a single function, not a combined aggregate. + * + * Peak pct err is raw |err|/|expected|*100 — no clamping. Near + * zero crossings (sin≈0, cos≈0, asin(0)≈0) the denominator is + * tiny and pct blows up even when abs err is sub-LSB. The Notes + * column flags these rows. Use Peak abs err and Mean abs err to + * judge accuracy at zero crossings; use Peak pct err elsewhere. + */ + md_h3("14.2 Neighborhoods (peak error ±10 samples)"); + + /* fr_sin radian at i=0 (-360°) — zero crossing neighborhood */ + neighborhood("fr_sin radian @ -360 deg (i=0)", 3, 0, 10, 131072, + -2.0 * M_PI, 2.0 * M_PI); + + md_h3("14.3 Per-function trig sweep"); + + printf("| Function | Input | Range start | Range end | Points | Increment | " + "Peak abs err | @abs_err | Peak pct err | @pct_err | Expected | Got | Mean abs err | Notes |\n"); + printf("|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|---:|---:|---|\n"); + + /* Helper: print one row of the per-function table */ + #define SWEEP_ROW(name, sig, rlo, rhi, pts, step, st, note) \ + printf("| %s | %s | %s | %s | %d | %s | %f | %.4f | %.4f%% | %.4f | %f | %f | %f | %s |\n", \ + name, sig, rlo, rhi, pts, step, \ + (st).max_abs_err, (st).worst_input, (st).max_pct_err, \ + (st).worst_pct_input, (st).worst_pct_expected, (st).worst_pct_actual, \ + stats_mean(&(st)), note) + + /* fr_sin_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + stats_add(&st, deg, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("fr_sin_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, ""); + } + /* fr_cos_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + stats_add(&st, deg, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("fr_cos_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, ""); + } + /* fr_tan_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + double ref; + if (bam == 16384) ref = TAN_CLAMP; + else if (bam == 49152) ref = -TAN_CLAMP; + else ref = q16(tan_ref(rad)); + stats_add(&st, deg, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), ref, TAN_CLAMP); + } + SWEEP_ROW("fr_tan_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "pole clamped"); + } + /* fr_sin (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0); + } + SWEEP_ROW("fr_sin", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π small-angle bypass"); + } + /* fr_cos (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0); + } + SWEEP_ROW("fr_cos", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, ""); + } + /* fr_tan (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); + } + SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π bypass; s64 lerp near poles"); + } + /* FR_SinI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + stats_add(&st, (double)d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("FR_SinI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, ""); + } + /* FR_CosI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + stats_add(&st, (double)d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("FR_CosI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, ""); + } + /* FR_TanI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + double ref; + if (d % 180 == 90 || d % 180 == -90) + ref = (d > 0) ? TAN_CLAMP : -TAN_CLAMP; + else + ref = q16(tan_ref(rad)); + stats_add(&st, (double)d, frd(FR_TanI((s16)d), FR_TRIG_OUT_PREC), ref, TAN_CLAMP); + } + SWEEP_ROW("FR_TanI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "pole clamped"); + } + /* fr_sin_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, 16), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("fr_sin_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak at sin=0 crossing"); + } + /* fr_cos_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, 16), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("fr_cos_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-90/270 small-angle bypass"); + } + /* fr_tan_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); + } + SWEEP_ROW("fr_tan_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak near tan pole"); + } + + /* --- Inverse Trig --- */ + + /* FR_acos */ + { + stats_t st; stats_reset(&st); + const int N = 65537; + for (int i = 0; i < N; i++) { + double xd = -1.0 + 2.0 * i / (double)(N - 1); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); + s32 rad = FR_acos(fr, 15, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(acos(actual_xd)), M_PI); + } + SWEEP_ROW("FR_acos", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out"); + } + /* FR_asin */ + { + stats_t st; stats_reset(&st); + const int N = 65537; + for (int i = 0; i < N; i++) { + double xd = -1.0 + 2.0 * i / (double)(N - 1); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); + s32 rad = FR_asin(fr, 15, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(asin(actual_xd)), M_PI); + } + SWEEP_ROW("FR_asin", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out; pct peak at asin(0)=0"); + } + /* FR_atan */ + { + stats_t st; stats_reset(&st); + const int N = 131072; + for (int i = 0; i < N; i++) { + double xd = -10.0 + 20.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 rad = FR_atan(fr, 16, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(atan(actual_xd)), M_PI / 2.0); + } + SWEEP_ROW("FR_atan", "(s32,u16 16,u16 16)", "-10.0", "+10.0", N, "1.53e-4", st, "r16 in/out"); + } + /* FR_atan2 — unit circle sweep */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double angle = -M_PI + 2.0 * M_PI * i / (double)N; + double deg = angle * 180.0 / M_PI; + s32 x = tofix(cos(angle), 15); + s32 y = tofix(sin(angle), 15); + s32 rad = FR_atan2(y, x, 16); + stats_add(&st, deg, frd(rad, 16), q16(atan2((double)y, (double)x)), M_PI); + } + SWEEP_ROW("FR_atan2", "(s32 y,s32 x,u16 16)", "-180", "+180", N, "0.0055 deg", st, "unit circle r15"); + } + + /* --- Log / Exp --- */ + + /* FR_log2 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_log2(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log2(actual_xd)), log2(32000.0)); + } + SWEEP_ROW("FR_log2", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_ln */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_ln(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log(actual_xd)), log(32000.0)); + } + SWEEP_ROW("FR_ln", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_log10 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_log10(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log10(actual_xd)), log10(32000.0)); + } + SWEEP_ROW("FR_log10", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_pow2 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -8.0 + 16.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_pow2(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(pow(2.0, actual_xd)), pow(2.0, 8.0)); + } + SWEEP_ROW("FR_pow2", "(s32,u16 16)", "-8.0", "+8.0", N, "2.44e-4", st, "r16 in/out"); + } + /* FR_EXP (macro wrapping FR_pow2) */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -5.0 + 15.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_EXP(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(exp(actual_xd)), 32000.0); + } + SWEEP_ROW("FR_EXP", "(s32,u16 16)", "-5.0", "+10.0", N, "2.29e-4", st, "macro, wraps FR_pow2"); + } + /* FR_POW10 (macro wrapping FR_pow2) */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -2.0 + 6.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_POW10(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(pow(10.0, actual_xd)), 32000.0); + } + SWEEP_ROW("FR_POW10", "(s32,u16 16)", "-2.0", "+4.0", N, "9.15e-5", st, "macro, wraps FR_pow2"); + } + + #undef SWEEP_ROW + printf("\n"); } int main(void) { @@ -2362,4 +2805,4 @@ int main(void) { section_accuracy_table(); return 0; -} +} \ No newline at end of file diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..29d8ac7 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,131 @@ +# FR_Math Tools + +Diagnostic and code-generation utilities for the FR_Math library. + +## trig_neighborhood + +Sweep any math function over a range and print a neighborhood table showing +raw output, expected reference, absolute error, and percent error. + +**Build:** `make tools` + +**Usage:** +``` +trig_neighborhood
    [options] +``` + +### Supported functions (25) + +| Category | Functions | +|---|---| +| Trig (degrees) | `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin`, `fr_cos`, `fr_tan`, `FR_SinI`, `FR_CosI`, `FR_TanI`, `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` | +| Inverse trig | `FR_acos`, `FR_asin`, `FR_atan`, `FR_atan2` | +| Logarithmic | `FR_log2`, `FR_ln`, `FR_log10` | +| Exponential | `FR_pow2`, `FR_EXP`, `FR_POW10` | +| Other | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | + +### Options + +| Option | Description | Default | +|---|---|---| +| `--inc ` | Increment per sample | function-dependent | +| `--fmt md\|csv\|ascii` | Output format | `md` | +| `--radix ` | Input radix for fixed-point | 16 | +| `--out_radix ` | Output radix (inv trig, log) | 16 | +| `--y ` | Fixed y for hypot functions | 0.0 | + +### Default increments + +- Trig + FR_atan2: `360/65536` (~0.0055 degrees) +- FR_acos, FR_asin: `1/32768` (~3.05e-5) +- All others: `1/65536` (~1.53e-5) + +### Examples + +```bash +# Cosine near -90 degrees +build/trig_neighborhood fr_cos -90 15 + +# Sine sweep in CSV format +build/trig_neighborhood fr_sin -360 10 --fmt csv + +# Tangent near pole +build/trig_neighborhood fr_tan 89.5 20 --inc 0.01 + +# Arcsine near zero +build/trig_neighborhood FR_asin 0.0001 15 --inc 3.05e-5 --radix 15 + +# Log2 near 1.0 +build/trig_neighborhood FR_log2 1.0 15 --inc 0.01 + +# Atan2 near 90 degrees +build/trig_neighborhood FR_atan2 90 15 + +# Hypot with y=50 +build/trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8 +``` + +--- + +## coef-gen.py + +Python script for generating power-of-two coefficient approximations. Given a +target floating-point value, searches for combinations of `+/- 2^(-k)` terms +that best approximate the value using only bit-shifts and adds. + +**Usage:** `python3 tools/coef-gen.py` + +--- + +## fr_coef-gen.cpp + +C++ coefficient generator for 32-bit host. Similar purpose to `coef-gen.py` +but runs natively and can be used for brute-force search over larger term +counts. + +**Build:** `g++ -O2 tools/fr_coef-gen.cpp -o build/fr_coef-gen` + +--- + +## gen_pow2_table.py + +Generates the `gFR_POW2_FRAC_TAB[65]` lookup table used by `FR_pow2()`. +Output is a C array suitable for inclusion in FR_math.c. + +**Usage:** `python3 tools/gen_pow2_table.py` + +--- + +## gen_radix28_constants.py + +Generates radix-28 constants used by FR_EXP, FR_ln, FR_log10 for base +conversion (e.g., `FR_kLOG2E_28`, `FR_kLOG2_10_28`). + +**Usage:** `python3 tools/gen_radix28_constants.py` + +--- + +## check_published_versions.sh + +Verifies that published version tags match the version defined in +`FR_math.h` (`FR_MATH_VERSION_HEX`). Used in CI/release workflows. + +**Usage:** `bash tools/check_published_versions.sh` + +--- + +## make_release.sh + +Release automation script. Bumps version, tags, and prepares release +artifacts. + +**Usage:** `bash tools/make_release.sh` + +--- + +## interp_analysis.html + +Interactive HTML/JS visualization for interpolation analysis. Open in a +browser to explore interpolation error characteristics. + +**Usage:** Open `tools/interp_analysis.html` in a web browser. diff --git a/tools/trig_neighborhood.cpp b/tools/trig_neighborhood.cpp new file mode 100644 index 0000000..3266515 --- /dev/null +++ b/tools/trig_neighborhood.cpp @@ -0,0 +1,519 @@ +/* + * trig_neighborhood.cpp — sweep any math function over a range, print neighborhood table + * + * Usage: + * trig_neighborhood
    [--inc ] [--fmt md|csv|ascii] + * [--radix ] [--out_radix ] [--y ] + * + * Trig functions: + * fr_sin_bam, fr_cos_bam, fr_tan_bam, + * fr_sin, fr_cos, fr_tan, + * FR_SinI, FR_CosI, FR_TanI, + * fr_sin_deg, fr_cos_deg, fr_tan_deg + * + * Inverse trig: + * FR_acos, FR_asin, FR_atan, FR_atan2 + * + * Logarithmic: + * FR_log2, FR_ln, FR_log10 + * + * Exponential: + * FR_pow2, FR_EXP, FR_POW10 + * + * Other: + * FR_sqrt, FR_hypot, FR_hypot_fast8 + * + * center: center value (degrees for trig/atan2, input value for others) + * half: number of samples on each side of center + * --inc: increment (default depends on function type) + * --fmt: output format: md (default), csv, ascii + * --radix: input radix for fixed-point functions (default: 16) + * --out_radix: output radix for inverse trig and log (default: 16) + * --y: fixed y value for FR_hypot / FR_hypot_fast8 (default: 0.0) + * + * Examples: + * trig_neighborhood fr_cos -90 15 + * trig_neighborhood fr_sin -360 10 --fmt csv + * trig_neighborhood fr_tan 89.5 20 --inc 0.01 + * trig_neighborhood fr_sin_deg 45 10 --radix 8 + * trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16 + * trig_neighborhood FR_log2 1.0 15 --inc 0.01 + * trig_neighborhood FR_atan2 90 15 + * trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8 + * + * Build: + * make tools + */ +#include +#include +#include +#include +#include "FR_math.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +static double frd(s32 v, int p) { return (double)v / ldexp(1.0, p); } +static double qN(double v, int p) { double s = ldexp(1.0, p); return floor(v * s + 0.5) / s; } +/* Round-to-nearest float→fixed conversion (not truncation) */ +static s32 tofix(double v, int p) { return (s32)floor(ldexp(v, p) + 0.5); } +static const double TAN_CLAMP = (double)0x7fffffff / 65536.0; +static double tan_ref(double rad) { + double t = tan(rad); + if (t > TAN_CLAMP) return TAN_CLAMP; + if (t < -TAN_CLAMP) return -TAN_CLAMP; + return t; +} + +enum Func { + F_SIN_BAM, F_COS_BAM, F_TAN_BAM, + F_SIN, F_COS, F_TAN, + F_SINI, F_COSI, F_TANI, + F_SIN_DEG, F_COS_DEG, F_TAN_DEG, + F_ACOS, F_ASIN, F_ATAN, F_ATAN2, + F_LOG2, F_LN, F_LOG10, + F_POW2, F_EXP, F_POW10, + F_SQRT, F_HYPOT, F_HYPOT_FAST8, + F_UNKNOWN +}; + +enum Fmt { FMT_MD, FMT_CSV, FMT_ASCII }; + +static Func parse_func(const char *s) { + if (!strcmp(s, "fr_sin_bam")) return F_SIN_BAM; + if (!strcmp(s, "fr_cos_bam")) return F_COS_BAM; + if (!strcmp(s, "fr_tan_bam")) return F_TAN_BAM; + if (!strcmp(s, "fr_sin")) return F_SIN; + if (!strcmp(s, "fr_cos")) return F_COS; + if (!strcmp(s, "fr_tan")) return F_TAN; + if (!strcmp(s, "FR_SinI")) return F_SINI; + if (!strcmp(s, "FR_CosI")) return F_COSI; + if (!strcmp(s, "FR_TanI")) return F_TANI; + if (!strcmp(s, "fr_sin_deg")) return F_SIN_DEG; + if (!strcmp(s, "fr_cos_deg")) return F_COS_DEG; + if (!strcmp(s, "fr_tan_deg")) return F_TAN_DEG; + if (!strcmp(s, "FR_acos")) return F_ACOS; + if (!strcmp(s, "FR_asin")) return F_ASIN; + if (!strcmp(s, "FR_atan")) return F_ATAN; + if (!strcmp(s, "FR_atan2")) return F_ATAN2; + if (!strcmp(s, "FR_log2")) return F_LOG2; + if (!strcmp(s, "FR_ln")) return F_LN; + if (!strcmp(s, "FR_log10")) return F_LOG10; + if (!strcmp(s, "FR_pow2")) return F_POW2; + if (!strcmp(s, "FR_EXP")) return F_EXP; + if (!strcmp(s, "FR_POW10")) return F_POW10; + if (!strcmp(s, "FR_sqrt")) return F_SQRT; + if (!strcmp(s, "FR_hypot")) return F_HYPOT; + if (!strcmp(s, "FR_hypot_fast8")) return F_HYPOT_FAST8; + return F_UNKNOWN; +} + +static const char *func_name(Func f) { + switch (f) { + case F_SIN_BAM: return "fr_sin_bam"; + case F_COS_BAM: return "fr_cos_bam"; + case F_TAN_BAM: return "fr_tan_bam"; + case F_SIN: return "fr_sin"; + case F_COS: return "fr_cos"; + case F_TAN: return "fr_tan"; + case F_SINI: return "FR_SinI"; + case F_COSI: return "FR_CosI"; + case F_TANI: return "FR_TanI"; + case F_SIN_DEG: return "fr_sin_deg"; + case F_COS_DEG: return "fr_cos_deg"; + case F_TAN_DEG: return "fr_tan_deg"; + case F_ACOS: return "FR_acos"; + case F_ASIN: return "FR_asin"; + case F_ATAN: return "FR_atan"; + case F_ATAN2: return "FR_atan2"; + case F_LOG2: return "FR_log2"; + case F_LN: return "FR_ln"; + case F_LOG10: return "FR_log10"; + case F_POW2: return "FR_pow2"; + case F_EXP: return "FR_EXP"; + case F_POW10: return "FR_POW10"; + case F_SQRT: return "FR_sqrt"; + case F_HYPOT: return "FR_hypot"; + case F_HYPOT_FAST8: return "FR_hypot_fast8"; + default: return "?"; + } +} + +static int is_sin(Func f) { return f == F_SIN_BAM || f == F_SIN || f == F_SINI || f == F_SIN_DEG; } +static int is_cos(Func f) { return f == F_COS_BAM || f == F_COS || f == F_COSI || f == F_COS_DEG; } +static int is_trig(Func f) { return f <= F_TAN_DEG; } + +/* Evaluate function. Returns raw s32 result and sets input_fp, expected, out_prec. */ +static s32 eval(Func f, double val, int radix, int out_radix, + double y_val, s32 *input_fp, double *expected, int *out_prec) +{ + s32 raw = 0; + + /* --- Trig functions (val = degrees) --- */ + if (is_trig(f)) { + double rad = val * M_PI / 180.0; + *out_prec = 16; + + if (is_sin(f)) *expected = qN(sin(rad), 16); + else if (is_cos(f)) *expected = qN(cos(rad), 16); + else *expected = qN(tan_ref(rad), 16); + + switch (f) { + case F_SIN_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_sin_bam(bam); + break; + } + case F_COS_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_cos_bam(bam); + break; + } + case F_TAN_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_tan_bam(bam); + break; + } + case F_SIN: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + raw = fr_sin(rad_fp, (u16)radix); + break; + } + case F_COS: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + raw = fr_cos(rad_fp, (u16)radix); + break; + } + case F_TAN: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + raw = fr_tan(rad_fp, (u16)radix); + break; + } + case F_SINI: + *input_fp = (s32)(int)val; + raw = FR_SinI((int)val); + break; + case F_COSI: + *input_fp = (s32)(int)val; + raw = FR_CosI((int)val); + break; + case F_TANI: + *input_fp = (s32)(int)val; + raw = FR_TanI((s16)(int)val); + break; + case F_SIN_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + raw = fr_sin_deg(deg_fp, (u16)radix); + break; + } + case F_COS_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + raw = fr_cos_deg(deg_fp, (u16)radix); + break; + } + case F_TAN_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + raw = fr_tan_deg(deg_fp, (u16)radix); + break; + } + default: + break; + } + return raw; + } + + /* --- Inverse trig (val = input value, not degrees) --- */ + if (f == F_ACOS || f == F_ASIN || f == F_ATAN) { + *out_prec = out_radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_ACOS: + raw = FR_acos(inp, (u16)radix, (u16)out_radix); + *expected = qN(acos(val), out_radix); + break; + case F_ASIN: + raw = FR_asin(inp, (u16)radix, (u16)out_radix); + *expected = qN(asin(val), out_radix); + break; + case F_ATAN: + raw = FR_atan(inp, (u16)radix, (u16)out_radix); + *expected = qN(atan(val), out_radix); + break; + default: + break; + } + return raw; + } + + /* --- FR_atan2 (val = degrees on unit circle) --- */ + if (f == F_ATAN2) { + *out_prec = out_radix; + double rad = val * M_PI / 180.0; + s32 x = tofix(cos(rad), 15); + s32 y = tofix(sin(rad), 15); + *input_fp = tofix(val, radix); + raw = FR_atan2(y, x, (u16)out_radix); + double ref = atan2((double)y, (double)x); + *expected = qN(ref, out_radix); + return raw; + } + + /* --- Log functions (val = input value) --- */ + if (f == F_LOG2 || f == F_LN || f == F_LOG10) { + *out_prec = out_radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_LOG2: + raw = FR_log2(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log2(val), out_radix) : 0.0; + break; + case F_LN: + raw = FR_ln(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log(val), out_radix) : 0.0; + break; + case F_LOG10: + raw = FR_log10(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log10(val), out_radix) : 0.0; + break; + default: + break; + } + return raw; + } + + /* --- Power/exp functions (val = exponent) --- */ + if (f == F_POW2 || f == F_EXP || f == F_POW10) { + *out_prec = radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_POW2: + raw = FR_pow2(inp, (u16)radix); + *expected = qN(pow(2.0, val), radix); + break; + case F_EXP: + raw = FR_EXP(inp, (u16)radix); + *expected = qN(exp(val), radix); + break; + case F_POW10: + raw = FR_POW10(inp, (u16)radix); + *expected = qN(pow(10.0, val), radix); + break; + default: + break; + } + return raw; + } + + /* --- FR_sqrt (val = input value) --- */ + if (f == F_SQRT) { + *out_prec = radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + raw = FR_sqrt(inp, (u16)radix); + *expected = (val >= 0.0) ? qN(sqrt(val), radix) : 0.0; + return raw; + } + + /* --- FR_hypot / FR_hypot_fast8 (val = x, y_val = y) --- */ + if (f == F_HYPOT || f == F_HYPOT_FAST8) { + *out_prec = radix; + s32 x_fp = tofix(val, radix); + s32 y_fp = tofix(y_val, radix); + *input_fp = x_fp; + + if (f == F_HYPOT) + raw = FR_hypot(x_fp, y_fp, (u16)radix); + else + raw = FR_hypot_fast8(x_fp, y_fp); + + *expected = qN(hypot(val, y_val), radix); + return raw; + } + + /* fallback */ + *input_fp = 0; + *expected = 0.0; + *out_prec = 16; + return 0; +} + +/* Smart default increment based on function type */ +static double default_inc(Func f) { + if (is_trig(f) || f == F_ATAN2) + return 360.0 / 65536.0; /* ~0.0055 degrees */ + if (f == F_ACOS || f == F_ASIN) + return 1.0 / 32768.0; /* ~3.05e-5, matches r15 LSB */ + return 1.0 / 65536.0; /* ~1.53e-5, matches r16 LSB */ +} + +static void usage(void) { + fprintf(stderr, + "Usage: trig_neighborhood
    [options]\n" + "\n" + "Supported functions:\n" + "\n" + " Trig (input: degrees):\n" + " fr_sin_bam, fr_cos_bam, fr_tan_bam\n" + " fr_sin, fr_cos, fr_tan\n" + " FR_SinI, FR_CosI, FR_TanI\n" + " fr_sin_deg, fr_cos_deg, fr_tan_deg\n" + "\n" + " Inverse trig (input: value):\n" + " FR_acos, FR_asin, FR_atan\n" + "\n" + " Inverse trig (input: degrees on unit circle):\n" + " FR_atan2\n" + "\n" + " Logarithmic (input: value):\n" + " FR_log2, FR_ln, FR_log10\n" + "\n" + " Exponential (input: exponent):\n" + " FR_pow2, FR_EXP, FR_POW10\n" + "\n" + " Other:\n" + " FR_sqrt (input: value)\n" + " FR_hypot, FR_hypot_fast8 (input: x, --y for y)\n" + "\n" + " center: center of sweep (degrees for trig/atan2, value otherwise)\n" + " half: number of samples each side of center\n" + "\n" + "Options:\n" + " --inc increment (default depends on function)\n" + " --fmt md|csv|ascii output format (default: md)\n" + " --radix input radix for fixed-point (default: 16)\n" + " --out_radix output radix for inv trig/log (default: 16)\n" + " --y fixed y value for hypot functions (default: 0.0)\n" + "\n" + "Examples:\n" + " trig_neighborhood fr_cos -90 15\n" + " trig_neighborhood fr_sin -360 10 --fmt csv\n" + " trig_neighborhood fr_tan 89.5 20 --inc 0.01\n" + " trig_neighborhood fr_sin_deg 45 10 --radix 8\n" + " trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16\n" + " trig_neighborhood FR_log2 1.0 15 --inc 0.01\n" + " trig_neighborhood FR_atan2 90 15\n" + " trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8\n" + ); +} + +int main(int argc, char **argv) { + if (argc < 4) { usage(); return 1; } + + Func func = parse_func(argv[1]); + if (func == F_UNKNOWN) { + fprintf(stderr, "Unknown function: %s\n", argv[1]); + usage(); + return 1; + } + + double center = atof(argv[2]); + int half = atoi(argv[3]); + double inc = -1.0; /* sentinel: use default */ + Fmt fmt = FMT_MD; + int radix = 16; + int out_radix = 16; + double y_val = 0.0; + + for (int i = 4; i < argc; i++) { + if (!strcmp(argv[i], "--inc") && i + 1 < argc) + inc = atof(argv[++i]); + else if (!strcmp(argv[i], "--fmt") && i + 1 < argc) { + i++; + if (!strcmp(argv[i], "csv")) fmt = FMT_CSV; + else if (!strcmp(argv[i], "ascii")) fmt = FMT_ASCII; + else fmt = FMT_MD; + } + else if (!strcmp(argv[i], "--radix") && i + 1 < argc) + radix = atoi(argv[++i]); + else if (!strcmp(argv[i], "--out_radix") && i + 1 < argc) + out_radix = atoi(argv[++i]); + else if (!strcmp(argv[i], "--y") && i + 1 < argc) + y_val = atof(argv[++i]); + } + + if (inc < 0.0) inc = default_inc(func); + + const char *cols[] = {"sample", "val", "input_fp", "radix", "raw_got", "raw_exp", "expected", "got", "abs_err", "pct_err"}; + int ncols = 10; + + switch (fmt) { + case FMT_CSV: + for (int c = 0; c < ncols; c++) + printf("%s%s", cols[c], c < ncols - 1 ? "," : "\n"); + break; + case FMT_MD: + printf("**%s** center=%.6f, +/-%d samples, inc=%.6g, radix=%d", + func_name(func), center, half, inc, radix); + if (out_radix != radix) + printf(", out_radix=%d", out_radix); + if (func == F_HYPOT || func == F_HYPOT_FAST8) + printf(", y=%.6f", y_val); + printf("\n\n"); + printf("|"); + for (int c = 0; c < ncols; c++) printf(" %s |", cols[c]); + printf("\n|"); + for (int c = 0; c < ncols; c++) printf("---|"); + printf("\n"); + break; + case FMT_ASCII: + printf("# %s center=%.6f +/-%d inc=%.6g radix=%d", + func_name(func), center, half, inc, radix); + if (out_radix != radix) + printf(" out_radix=%d", out_radix); + if (func == F_HYPOT || func == F_HYPOT_FAST8) + printf(" y=%.6f", y_val); + printf("\n"); + printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n", + cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], cols[6], cols[7], cols[8], cols[9]); + printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n", + "--------", "------------", "------------", "------", + "----------", "----------", + "------------", "------------", "------------", "------------"); + break; + } + + for (int k = -half; k <= half; k++) { + double val = center + k * inc; + s32 input_fp; + double expected; + int out_prec; + s32 raw = eval(func, val, radix, out_radix, y_val, &input_fp, &expected, &out_prec); + s32 raw_exp = (s32)floor(ldexp(expected, out_prec) + 0.5); + double got = frd(raw, out_prec); + double ae = fabs(got - expected); + double pe = (expected != 0.0) ? ae / fabs(expected) * 100.0 : (ae != 0.0 ? 100.0 : 0.0); + + switch (fmt) { + case FMT_CSV: + printf("%d,%.6g,%d,%d,%d,%d,%.6f,%.6f,%.6f,%.4f%%\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + case FMT_MD: + printf("| %d | %.6g | %d | %d | %d | %d | %.6f | %.6f | %.6f | %.4f%% |\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + case FMT_ASCII: + printf("%8d %12.6g %12d %6d %10d %10d %12.6f %12.6f %12.6f %11.4f%%\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + } + } + + return 0; +} From 44061aafdf5e75cb9b57961e354a59bc516ba266 Mon Sep 17 00:00:00 2001 From: deftio Date: Mon, 4 May 2026 09:16:52 -0700 Subject: [PATCH 4/7] v2.0.7 docs and prec updates --- .github/workflows/release.yml | 24 + README.md | 160 +-- VERSION | 2 +- agents.md | 3 +- compare_lfm/comparison_results.json | 92 +- compare_lfm/comparison_summary.md | 32 +- dev/misc/FR_math.c.checkpoint3 | 1705 +++++++++++++++++++++++++++ docs/README.md | 54 +- docs/building.md | 44 +- docs/examples.md | 2 +- docs/getting-started.md | 4 +- idf_component.yml | 2 +- library.json | 2 +- library.properties | 2 +- llms.txt | 5 +- makefile | 43 +- pages/assets/site.js | 2 +- pages/guide/building.html | 42 +- pages/guide/examples.html | 2 +- pages/guide/getting-started.html | 4 +- pages/index.html | 67 +- scripts/crossbuild-docker.sh | 123 -- scripts/crossbuild_sizes.sh | 290 +++++ scripts/size_report.sh | 142 --- scripts/update_sizes.sh | 158 --- src/FR_math.c | 394 +++++-- src/FR_math.h | 32 +- src/FR_math_2D.cpp | 2 +- src/FR_math_2D.h | 2 +- src/FR_tan32.c | 282 ----- src/FR_tan_table.h | 115 -- src/FR_trig_table.h | 106 -- tests/test_full_sweep.c | 346 ------ tests/test_pole_table.c | 92 -- tests/test_sweep_csv.c | 149 --- tests/test_tan32.c | 424 ------- tests/test_tan32_peaks.c | 198 ---- tests/test_tan32_sweep.c | 318 ----- tests/test_tdd.cpp | 2 +- tools/trig_neighborhood.cpp | 17 + 40 files changed, 2633 insertions(+), 2852 deletions(-) create mode 100644 dev/misc/FR_math.c.checkpoint3 delete mode 100755 scripts/crossbuild-docker.sh create mode 100755 scripts/crossbuild_sizes.sh delete mode 100755 scripts/size_report.sh delete mode 100755 scripts/update_sizes.sh delete mode 100644 src/FR_tan32.c delete mode 100644 src/FR_tan_table.h delete mode 100644 src/FR_trig_table.h delete mode 100644 tests/test_full_sweep.c delete mode 100644 tests/test_pole_table.c delete mode 100644 tests/test_sweep_csv.c delete mode 100644 tests/test_tan32.c delete mode 100644 tests/test_tan32_peaks.c delete mode 100644 tests/test_tan32_sweep.c diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c5765cd..bea9935 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -99,3 +99,27 @@ jobs: body_path: /tmp/release_notes.md draft: false prerelease: false + + publish-pio: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install PlatformIO + run: pip install platformio + - name: Publish to PlatformIO Registry + env: + PLATFORMIO_AUTH_TOKEN: ${{ secrets.PLATFORMIO_AUTH_TOKEN }} + run: pio pkg publish . --no-interactive + + publish-espressif: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install compote (ESP Component Manager) + run: pip install idf-component-manager + - name: Publish to Espressif Component Registry + env: + IDF_COMPONENT_API_TOKEN: ${{ secrets.IDF_COMPONENT_API_TOKEN }} + run: compote component upload --name fr_math --namespace deftio diff --git a/README.md b/README.md index 784535c..07c7ce5 100644 --- a/README.md +++ b/README.md @@ -2,26 +2,23 @@ [![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) [![Coverage](https://img.shields.io/badge/coverage-98%25-brightgreen.svg)](#building-and-testing) [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://deftio.github.io/fr_math/) -[![Version](https://img.shields.io/badge/version-2.0.7-blue.svg)](release_notes.md) - +[![Version](https://img.shields.io/badge/version-2.0.8-blue.svg)](release_notes.md) + [![PlatformIO](https://img.shields.io/badge/PlatformIO-library-teal.svg)](https://registry.platformio.org/libraries/deftio/fr_math) [![Arduino](https://img.shields.io/badge/Arduino-library-teal.svg)](https://github.com/deftio/fr_math) [![ESP Component](https://img.shields.io/badge/ESP--IDF-component-teal.svg)](https://components.espressif.com/components/deftio/fr_math) - # FR_Math: A C Language Fixed-Point Math Library for Embedded Systems -FR_Math is a compact, integer-only fixed-point math library built for -systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from -legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it -provides a full suite of math primitives — trigonometry, logarithms, -roots, transforms, and signal generators — while remaining -deterministic, portable, and small. Unlike traditional fixed-point -libraries, FR_Math lets the caller choose the binary point per -operation, trading precision and range explicitly instead of locking -into a single format. Pure C (C99/C11/C17) with an optional C++ -2D-transform wrapper. Compiles under Arduino. Zero dependencies -beyond ``. +See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API reference, examples, fixed-point primer, build instructions. + + +**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. Optional print utility functions are also provided for pretty printing out fixed point numbers over serial links or buffers. + +Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (raddix) per operation, trading precision and range explicitly instead of locking into a single format. FR_math is Pure C (C99/C11/C17,with) with C++ wrappers. +Compiles under Arduino, PlatformIO, Espressif, many older embedded targets. +Zero dependencies beyond ``. + ### Measured accuracy @@ -29,49 +26,46 @@ Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the -number of fractional bits available. All functions support radix 0 to 30. - - -| Function | Max err (%)*| Avg err (%) | Peak at | Note | -|---|---:|---:|---:|---| -| sin/cos (BAM) | 0.4578 | 0.0076 | 94 | fr_sin_bam/fr_cos_bam direct; 129-entry table | -| sin/cos (deg) | 0.4578 | 0.0076 | -359.5 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | -| sin/cos (rad) | 0.6104 | 0.0085 | -4.721 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | -| tan (BAM) | 0.5823 | 0.0008 | 16360 | fr_tan_bam 65536-pt full; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | -270.1 | FR_Tan ±360° s15.16 full; sat at poles | -| tan (rad) | 13.4069 | 0.0029 | -4.713 | fr_tan ±2π r16 full; sat at poles | -| asin / acos | 0.8743 | 0.0301 | 0.0123 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.5100 | 0.0237 | -2.571 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.3390 | 0.0154 | -0.018 | 20001-pt full sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0239 | 0.0000 | 0.0001 | Round-to-nearest | -| log2 | 0.0286 | 0.0029 | 0.895 | 65-entry mantissa table | -| pow2 | 0.0019 | 0.0003 | 3.36 | 65-entry fraction table | -| ln, log10 | 0.0004 | 0.0000 | 50 | Via FR_MULK28 from log2 | -| exp | 0.0003 | 0.0000 | 3.91 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0009 | 0.0001 | 3.92 | Shift-only scaling | -| pow10 | 0.0007 | 0.0000 | 1.97 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.0028 | 0.0002 | 1.99 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | 0 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | 1000 | Shift-only, no multiply | - -*Relative error; reference clamped to 1% of full-scale output. "Peak at" = input that produced max error. - - +number of fractional bits available. + +| Function | Max err (%)* | Avg err (%) | Note | +| --- | --- | --- | --- | +| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table | +| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | +| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | +| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles | +| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass | +| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | 65-entry mantissa table | +| pow2 | 0.0018 | 0.0004 | 65-entry fraction table | +| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | +| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + + ### What's in the box | Area | Functions | -|---|---| -| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | -| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | -| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` | -| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | -| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | -| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | -| Wave generators | `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` | -| Envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_release`, `fr_adsr_step` | -| 2D transforms | `FR_Matrix2D_CPT` (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) | -| Formatted output | `FR_printNumD`, `FR_printNumF`, `FR_printNumH`, `FR_numstr` | +| --- | --- | +| Arithmetic | FR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX | +| Utility | FR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN | +| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam | +| Trig (degree) | fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI | +| Inverse trig | FR_atan, FR_atan2, FR_asin, FR_acos | +| Log / exp | FR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 | +| Roots | FR_sqrt, FR_hypot, FR_hypot_fast8 | +| Wave generators | fr_wave_sqr, fr_wave_pwm, fr_wave_tri, fr_wave_saw, fr_wave_tri_morph, fr_wave_noise | +| Envelope | fr_adsr_init, fr_adsr_trigger, fr_adsr_release, fr_adsr_step | +| 2D transforms | FR_Matrix2D_CPT (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) | +| Formatted output | FR_printNumD, FR_printNumF, FR_printNumH, FR_numstr | ### Library size (FR_math.c only, `-Os`) @@ -80,24 +74,28 @@ sizes may vary depending on optimization and linker settings. Sizes include all code and internal tables; everything is ROMable. -| Target | Core | Full | -|--------|-----:|-----:| -| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB | -| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB | -| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB | -| ESP32 (Xtensa) | 3.5 KB | 5.2 KB | -| 68k | 3.5 KB | 5.3 KB | -| x86-64 (GCC) | 3.5 KB | 5.7 KB | -| x86-32 | 4.5 KB | 6.8 KB | -| MSP430 (16-bit) | 5.9 KB | 8.9 KB | -| 68HC11 | 10.8 KB | 16.0 KB | -| AVR (ATmega328P) | 7.0 KB | 10.6 KB | +| Target | Lean | Core | Full | +| --- | ---:| ---:| ---:| +| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB | +| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB | +| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | +| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB | +| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | +| 68k | 4.4 KB | 6.2 KB | 7.8 KB | +| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | +| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | +| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | +| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | +| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB | +| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB | -Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves). +Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt). +Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot). +Full = all features (+ print, waves, ADSR). The optional 2D module adds ~1 KB. \* MSP430, 68HC11, and AVR are 8/16-bit — every 32-bit operation expands to multiple instructions. -See [`docker/`](docker/) for the cross-compile setup. +See [Building & Testing](docs/building.md) for the full cross-compile setup. ### Lean build options @@ -106,10 +104,10 @@ for ROM-constrained targets. Define them before including `FR_math.h` (or pass `-D` on the compiler command line): | Define | What it removes | Typical savings | -|---|---|---| -| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB | -| `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB | -| `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB | +| --- | --- | --- | +| FR_CORE_ONLY | Everything below (print + waves) | ~1.9 KB | +| FR_NO_PRINT | FR_printNumF, FR_printNumD, FR_printNumH, FR_numstr | ~1.3 KB | +| FR_NO_WAVES | fr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC | ~0.6 KB | `FR_CORE_ONLY` is a convenience shorthand that defines both `FR_NO_PRINT` and `FR_NO_WAVES` in one step. @@ -134,7 +132,7 @@ make lib # build static library make test # run all tests (unit, TDD characterization, 2D) ``` -## Quick taste +## Example ```c #include "FR_math.h" @@ -216,23 +214,25 @@ The full docs ship in two forms — pick whichever fits how you read. **Terminal / editor (plain markdown):** - [docs/README.md](docs/README.md) — same content as plain markdown. - - [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md) - - [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md) +- [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md) +- [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md) ## History FR_Math has been in service since 2000, originally built for graphics transforms on 16 MHz 68k Palm Pilots. It shipped inside Trumpetsoft's *Inkstorm* on PalmOS, then moved forward through ARM, x86, MIPS, -RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current -release with a full test suite, bit-exact numerical specification, and -CI on every push. - +RISC-V, and various 8/16-bit embedded targets.  +The current release now has a full test suite, bit-exact numerical specification, and +CI on every push and better documentation. + ## License BSD-2-Clause — see [LICENSE.txt](LICENSE.txt). (c) 2000-2026 M. Chatterjee +PRs and suggestions are welcome.  Please be detailed as embedded systems can involve many tradeoffs. + ## For AI coding agents - [llms.txt](llms.txt) — machine-readable API summary @@ -241,4 +241,4 @@ BSD-2-Clause — see [LICENSE.txt](LICENSE.txt). ## Version 2.0.7 — see [release_notes.md](release_notes.md) for the v1 → v2 -migration guide, numerical fixes, and new functionality. +migration guide, numerical fixes, and new functionality. \ No newline at end of file diff --git a/VERSION b/VERSION index f1547e6..815e68d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.7 +2.0.8 diff --git a/agents.md b/agents.md index 2a461f6..806064d 100644 --- a/agents.md +++ b/agents.md @@ -14,9 +14,8 @@ Integer-only, zero dependencies, caller-selectable radix (binary point). ``` src/ Core library (this is what ships) FR_math.h Public API — all macros, function declarations, constants - FR_math.c All function implementations + FR_math.c All function implementations (trig tables inlined) FR_defs.h Type aliases (s8, s16, s32, u8, u16, u32) - FR_trig_table.h Precomputed sine table FR_math_2D.h/.cpp Optional C++ 2D transform class tests/ Test suite (7 programs, run via `make test`) diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json index b9c1b3b..76b25d9 100644 --- a/compare_lfm/comparison_results.json +++ b/compare_lfm/comparison_results.json @@ -13,9 +13,9 @@ "double_reference": "std::sin", "sweep": "65536-pt, [-pi, +pi]", "speed": { - "fr_math_ns_per_call": 5.6, - "libfixmath_ns_per_call": 10.6, - "fr_math_speedup": 1.91, + "fr_math_ns_per_call": 7.4, + "libfixmath_ns_per_call": 12.7, + "fr_math_speedup": 1.72, "faster": "fr_math" }, "accuracy_vs_double": { @@ -43,9 +43,9 @@ "double_reference": "std::cos", "sweep": "65536-pt, [-pi, +pi]", "speed": { - "fr_math_ns_per_call": 8.9, - "libfixmath_ns_per_call": 13.3, - "fr_math_speedup": 1.50, + "fr_math_ns_per_call": 9.9, + "libfixmath_ns_per_call": 13.2, + "fr_math_speedup": 1.34, "faster": "fr_math" }, "accuracy_vs_double": { @@ -73,19 +73,19 @@ "double_reference": "std::tan", "sweep": "65536-pt, [-1.2, 1.2] rad", "speed": { - "fr_math_ns_per_call": 7.1, - "libfixmath_ns_per_call": 32.6, - "fr_math_speedup": 4.57, + "fr_math_ns_per_call": 14.1, + "libfixmath_ns_per_call": 37.2, + "fr_math_speedup": 2.64, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 1.98158306e-04, - "mean_abs_error": 3.37019908e-05, - "max_error_lsb": 13.0, - "mean_error_lsb": 2.2, - "max_rel_error_pct": 0.1551, - "mean_rel_error_pct": 0.0055 + "max_abs_error": 5.06554437e-04, + "mean_abs_error": 5.84009618e-05, + "max_error_lsb": 33.2, + "mean_error_lsb": 3.8, + "max_rel_error_pct": 0.5850, + "mean_rel_error_pct": 0.0122 }, "libfixmath": { "max_abs_error": 1.82495961e-02, @@ -104,9 +104,9 @@ "double_reference": "std::asin", "sweep": "65536-pt, [-0.999, 0.999]", "speed": { - "fr_math_ns_per_call": 9.7, - "libfixmath_ns_per_call": 49.5, - "fr_math_speedup": 5.11, + "fr_math_ns_per_call": 11.9, + "libfixmath_ns_per_call": 64.0, + "fr_math_speedup": 5.38, "faster": "fr_math" }, "accuracy_vs_double": { @@ -134,9 +134,9 @@ "double_reference": "std::acos", "sweep": "65536-pt, [-0.999, 0.999]", "speed": { - "fr_math_ns_per_call": 8.4, - "libfixmath_ns_per_call": 50.7, - "fr_math_speedup": 6.03, + "fr_math_ns_per_call": 11.1, + "libfixmath_ns_per_call": 65.0, + "fr_math_speedup": 5.88, "faster": "fr_math" }, "accuracy_vs_double": { @@ -164,9 +164,9 @@ "double_reference": "std::atan", "sweep": "65536-pt, [-50, 50]", "speed": { - "fr_math_ns_per_call": 8.1, - "libfixmath_ns_per_call": 11.0, - "fr_math_speedup": 1.37, + "fr_math_ns_per_call": 10.8, + "libfixmath_ns_per_call": 14.8, + "fr_math_speedup": 1.36, "faster": "fr_math" }, "accuracy_vs_double": { @@ -194,9 +194,9 @@ "double_reference": "std::atan2", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 15.9, - "libfixmath_ns_per_call": 10.9, - "fr_math_speedup": 0.69, + "fr_math_ns_per_call": 20.8, + "libfixmath_ns_per_call": 13.7, + "fr_math_speedup": 0.66, "faster": "libfixmath" }, "accuracy_vs_double": { @@ -225,9 +225,9 @@ "double_reference": "std::sqrt", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 18.6, - "libfixmath_ns_per_call": 19.9, - "fr_math_speedup": 1.07, + "fr_math_ns_per_call": 24.8, + "libfixmath_ns_per_call": 26.2, + "fr_math_speedup": 1.05, "faster": "fr_math" }, "accuracy_vs_double": { @@ -255,9 +255,9 @@ "double_reference": "std::exp", "sweep": "65536-pt, [-5, 5]", "speed": { - "fr_math_ns_per_call": 3.0, - "libfixmath_ns_per_call": 64.7, - "fr_math_speedup": 21.28, + "fr_math_ns_per_call": 4.0, + "libfixmath_ns_per_call": 84.6, + "fr_math_speedup": 21.04, "faster": "fr_math" }, "accuracy_vs_double": { @@ -285,9 +285,9 @@ "double_reference": "std::log", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 9.0, - "libfixmath_ns_per_call": 453.2, - "fr_math_speedup": 50.53, + "fr_math_ns_per_call": 11.2, + "libfixmath_ns_per_call": 583.3, + "fr_math_speedup": 51.87, "faster": "fr_math" }, "accuracy_vs_double": { @@ -315,9 +315,9 @@ "double_reference": "std::log2", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 8.5, - "libfixmath_ns_per_call": 39.4, - "fr_math_speedup": 4.63, + "fr_math_ns_per_call": 11.3, + "libfixmath_ns_per_call": 51.1, + "fr_math_speedup": 4.53, "faster": "fr_math" }, "accuracy_vs_double": { @@ -345,8 +345,8 @@ "double_reference": "double a*b", "sweep": "65536-pt, a in [-50,50], b in [-2,2]", "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 1.2, + "fr_math_ns_per_call": 1.2, + "libfixmath_ns_per_call": 1.6, "fr_math_speedup": 1.33, "faster": "fr_math" }, @@ -375,9 +375,9 @@ "double_reference": "double a/b", "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]", "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 5.3, - "fr_math_speedup": 6.10, + "fr_math_ns_per_call": 1.2, + "libfixmath_ns_per_call": 6.9, + "fr_math_speedup": 5.96, "faster": "fr_math" }, "accuracy_vs_double": { @@ -406,7 +406,7 @@ "double_reference": "std::hypot", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 19.9 + "fr_math_ns_per_call": 26.2 }, "accuracy_vs_double": { "fr_math": { @@ -425,7 +425,7 @@ "double_reference": "std::hypot", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 2.6 + "fr_math_ns_per_call": 3.2 }, "accuracy_vs_double": { "fr_math": { diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md index e547ce4..0f467ad 100644 --- a/compare_lfm/comparison_summary.md +++ b/compare_lfm/comparison_summary.md @@ -25,7 +25,7 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. |----------|----------:|---------:|---------:|----------:|---------:|---------:|--------| | sin | 4.9 | 0.4816 | 0.0081 | 507.6 | 74.5513 | 0.6105 | FR | | cos | 4.4 | 0.3282 | 0.0077 | 508.3 | 74.4001 | 0.6121 | FR | -| tan | 13.0 | 0.1551 | 0.0055 | 1196.0 | 0.7099 | 0.0410 | FR | +| tan | 33.2 | 0.5850 | 0.0122 | 1196.0 | 0.7099 | 0.0410 | FR | | asin | 24.9 | 1.9776 | 0.0477 | 667.1 | 20.1233 | 2.4452 | FR | | acos | 24.6 | 0.2724 | 0.0093 | 667.8 | 15.3142 | 0.3475 | FR | | atan | 59.9 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR | @@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. | Function | FR_math | libfixmath | Speedup | Faster | |----------|--------:|-----------:|--------:|--------| -| sin | 5.6 | 10.6 | 1.91x | FR | -| cos | 8.9 | 13.3 | 1.50x | FR | -| tan | 7.1 | 32.6 | 4.57x | FR | -| asin | 9.7 | 49.5 | 5.11x | FR | -| acos | 8.4 | 50.7 | 6.03x | FR | -| atan | 8.1 | 11.0 | 1.37x | FR | -| atan2 | 15.9 | 10.9 | 0.69x | lfm | -| sqrt | 18.6 | 19.9 | 1.07x | FR | -| exp | 3.0 | 64.7 | 21.28x | FR | -| ln | 9.0 | 453.2 | 50.53x | FR | -| log2 | 8.5 | 39.4 | 4.63x | FR | -| mul | 0.9 | 1.2 | 1.33x | FR | -| div | 0.9 | 5.3 | 6.10x | FR | -| hypot | 19.9 | --- | --- | FR only | -| hypot_fast8 | 2.6 | --- | --- | FR only | +| sin | 7.4 | 12.7 | 1.72x | FR | +| cos | 9.9 | 13.2 | 1.34x | FR | +| tan | 14.1 | 37.2 | 2.64x | FR | +| asin | 11.9 | 64.0 | 5.38x | FR | +| acos | 11.1 | 65.0 | 5.88x | FR | +| atan | 10.8 | 14.8 | 1.36x | FR | +| atan2 | 20.8 | 13.7 | 0.66x | lfm | +| sqrt | 24.8 | 26.2 | 1.05x | FR | +| exp | 4.0 | 84.6 | 21.04x | FR | +| ln | 11.2 | 583.3 | 51.87x | FR | +| log2 | 11.3 | 51.1 | 4.53x | FR | +| mul | 1.2 | 1.6 | 1.33x | FR | +| div | 1.2 | 6.9 | 5.96x | FR | +| hypot | 26.2 | --- | --- | FR only | +| hypot_fast8 | 3.2 | --- | --- | FR only | ### Summary (13 head-to-head functions) diff --git a/dev/misc/FR_math.c.checkpoint3 b/dev/misc/FR_math.c.checkpoint3 new file mode 100644 index 0000000..45c75c2 --- /dev/null +++ b/dev/misc/FR_math.c.checkpoint3 @@ -0,0 +1,1705 @@ +/** + * + * @file FR_math.c - c implementation file for basic fixed + * radix math routines + * + * @copy Copyright (C) <2001-2026> + * @author M A Chatterjee + * + * This file contains integer math settable fixed point radix math routines for + * use on systems in which floating point is not desired or unavailable. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, please place an acknowledgment in the product documentation. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + * + */ + +#include "FR_math.h" +#include "FR_trig_table.h" + +#ifndef FR_NO_STDINT +#include +#endif + +/*======================================================= + * Full-precision radian/degree → BAM conversion helpers + * + * rad_to_bam_full(r) returns a full s32 BAM value where: + * upper 16 bits = integer BAM (the u16 table index) + * lower 16 bits = sub-BAM fractional part + * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. + * + * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are + * reordered so intermediate sums stay within s32 for |r| <= pi at r16. + */ +static s32 rad_to_bam_full(s32 r) +{ + /* 10 terms: 65536/(2*pi) ≈ 10430.37835... + * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 + * = 10430.378357 (~21-bit accuracy) + * Terms reordered: interleave negatives early to keep all intermediate + * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ + return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); +} + +/* deg_to_bam_full(d) — same idea for degrees. + * Input d must already be normalized to radix 16 and reduced to [-90, 90). + * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). + * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ +static s32 deg_to_bam_full(s32 d) +{ + return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); +} + +/* Normalize a fixed-radix value to radix 16. */ +static s32 normalize_to_r16(s32 val, u16 radix) +{ + return (radix > 16) ? (val >> (radix - 16)) + : (radix < 16) ? (val << (16 - radix)) + : val; +} + +/* Reduce non-negative radian (at r16) to [0, 2*pi). + * Helper used by range_reduce_rad and the near-pi small-angle paths. */ +static s32 reduce_to_2pi(s32 r) +{ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + if (r > (two_pi << 1)) + r -= (r / two_pi) * two_pi; + else if (r > two_pi) + r -= two_pi; + return r; +} + +/* Range-reduce radian value (at r16, non-negative) to [-pi, pi]. + * Caller guarantees r >= 0 (sign is handled externally). */ +static s32 range_reduce_rad(s32 r) +{ + r = reduce_to_2pi(r); + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + return r; +} + +/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. + * Normalizes to r16, reduces via positive-only path, applies shift-only multiply. + * Handles inputs beyond ±2*pi with modulus (slow path). */ +u16 fr_rad_to_bam(s32 rad, u16 radix) +{ + s32 r = normalize_to_r16(rad, radix); + /* BAM wraps naturally in u16, but range_reduce expects non-negative. + * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */ + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = range_reduce_rad(r); + s32 bam_full = rad_to_bam_full(r); + if (sign < 0) bam_full = -bam_full; + return (u16)((bam_full + (1 << 15)) >> 16); +} + +/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. + * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ +u16 fr_deg_to_bam(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + + /* Reduce to [-180, 180) */ + if (d >= FR_D360_R16 || d < -FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + if (d < -FR_D180_R16) d += FR_D360_R16; + + /* Reduce to [-90, 90) with BAM quadrant offset */ + u16 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } + + return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); +} + +/*======================================================= + * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan + * + * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits + * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd + * quadrants (1, 3) reverse the in-quadrant index so the table is always read + * in the same direction. + * + * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at + * index 128). After mirroring, small full_pos → small output (near zero), + * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles + * below one table step (~0.7°). This eliminates table quantization error + * in the region where it matters most. + * + * Sign rule: quadrants 2 and 3 negate the result. + * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. + */ +s32 fr_sin_bam(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ + u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ + + /* Exact cardinal angles */ + if (inq == 0) { + if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ + if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ + return -FR_TRIG_ONE; /* 270° → -1.0 */ + } + + /* Odd quadrants mirror: read table from the far end */ + if (q == 1 || q == 3) + inq = FR_TRIG_QUADRANT - inq; + + s32 v; + + /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). + * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. + * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ + if (inq < FR_TRIG_FRAC_MAX) { + v = (s32)(((u32)inq * 102944u + 8192u) >> 14); + } else { + /* Table lookup with 7-bit interpolation fraction */ + u32 idx = inq >> FR_TRIG_FRAC_BITS; + u32 frac = inq & FR_TRIG_FRAC_MASK; + s32 lo = (s32)gFR_SIN_TAB_Q[idx]; + s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; + v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); + v <<= 1; /* u0.15 → s15.16 */ + } + + return (q >= 2) ? -v : v; +} + +s32 fr_cos_bam(u16 bam) +{ + /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ + return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); +} + +s32 fr_cos(s32 rad, u16 radix) +{ + if (rad == 0) return FR_TRIG_ONE; + s32 r = normalize_to_r16(rad, radix); + if (r < 0) r = -r; + r = reduce_to_2pi(r); + /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, + * cos(3π/2+δ) = sin(δ) ≈ δ. */ + s32 delta = r - FR_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return -delta; + delta = r - FR_THREE_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return delta; + return fr_cos_bam(fr_rad_to_bam(rad, radix)); +} + +s32 fr_sin(s32 rad, u16 radix) +{ + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); + /* Near 0 after reduction: sin(δ) ≈ δ */ + if (r < 256) { + s32 v = r; + return (sign < 0) ? -v : v; + } + /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ + delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ + if (r > FR_PI(16)) r -= FR_TWO_PI(16); + u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); + s32 v = fr_sin_bam(bam); + return (sign < 0) ? -v : v; +} + +/*======================================================= + * BAM-native tangent: fr_tan_bam + * + * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant + * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the + * second octant (45°, 90°). Result is s15.16 with saturation at the + * poles. + * + * No 64-bit intermediates. One 32-bit division only in the >45° path. + */ +s32 fr_tan_bam(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ + u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ + s32 sign = 1; + u32 idx, frac; + s32 lo, hi, raw; + + /* Exact zeros: bam lands exactly on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Poles: bam lands exactly on 90° or 270° */ + if (inq == 0 && (q == 1 || q == 3)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + sign = -1; + } + + /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. + * Split into first octant [0, 45°) and second octant [45°, 90°). */ + if (inq < FR_TAN_OCTANT) { + /* First octant: direct table lookup + lerp. + * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, + * bottom FR_TAN_FRAC_BITS drive interpolation. */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near zero: redo interpolation with 4 extra bits of + * precision to reduce rounding error when result is small. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant: tan(x) = 1 / tan(90° - x). + * complement is in (0, 0x2000] = (0°, 45°]. */ + u32 comp = 0x4000u - inq; + + /* Look up tan(complement) from the table */ + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near pole: redo interpolation with 4 extra bits of + * precision. The reciprocal amplifies small interpolation + * errors, so extra precision significantly helps here. + * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + if (raw_hp < 32) { + raw = FR_TRIG_MAXVAL; + } else { + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + + return (sign < 0) ? -raw : raw; +} + +/* fr_tan — radian-input tangent with full sub-BAM precision. + * + * Goes directly to the 65-entry octant tangent table with 16-bit + * interpolation precision. Sign from quadrant, magnitude from table. + * No s64 intermediates. One 32-bit division in the second-octant path. + * + * Architecture: + * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-) + * 2. Magnitude: from octant table lookup + reciprocal identity + * - First octant [0,45°): direct table lerp + * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal + * 3. Return sign * magnitude */ + +/* Internal: given a full s32 BAM, compute |tan| directly from the table. + * Returns the unsigned magnitude (always >= 0). */ +static s32 tan_mag_from_bam_full(s32 bam_full) +{ + u16 bam0 = (u16)(bam_full >> 16); + u32 frac_sub = (u32)bam_full & 0xFFFFu; + + u32 q = ((u32)bam0 >> 14) & 0x3u; + u32 inq = (u32)bam0 & 0x3FFFu; + + /* Exact zeros: tan(0°) = tan(180°) = 0 */ + if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2)) + return 0; + + /* Exact poles: tan(90°) = tan(270°) → saturate */ + if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3)) + return FR_TRIG_MAXVAL; + + /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range. + * After this, full_pos represents distance from the nearest zero. */ + u32 full_pos; + if (q == 1 || q == 3) + full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub; + else + full_pos = ((u32)inq << 16) + frac_sub; + + /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */ + s32 raw; + if (full_pos < ((u32)FR_TAN_OCTANT << 16)) { + /* First octant [0, 45°): direct table lookup. + * 64 table intervals, each 2^23 sub-BAM units wide. */ + u32 idx = full_pos >> 23; + u32 frac16 = (full_pos >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near zero: redo with 4 extra bits of precision */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x). + * Complement = distance from pole, in first-octant range. */ + u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos; + + u32 idx = comp >> 23; + u32 frac16 = (comp >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near pole: redo with 4 extra bits then reciprocal */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + if (raw_hp < 32) + raw = FR_TRIG_MAXVAL; + else + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + return raw; +} + +s32 fr_tan(s32 rad, u16 radix) +{ + if (rad == 0) return 0; + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 r = normalize_to_r16(rad, radix); + s32 tan_sign = 1; + if (r < 0) { r = -r; tan_sign = -1; } + r = reduce_to_2pi(r); + /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + return (tan_sign < 0) ? -delta : delta; + } + /* Full pipeline */ + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + s32 bam_full = rad_to_bam_full(r); + + /* Sign from quadrant of the BAM position */ + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; +} + +/*======================================================= + * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then + * call the BAM-native functions. Cardinal angles are exact. + */ + +s32 fr_cos_deg(s32 deg, u16 radix) +{ + if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); + if (deg < 0) deg = -deg; + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return FR_TRIG_ONE; + if (rem == 90) return 0; + if (rem == 180) return -FR_TRIG_ONE; + if (rem == 270) return 0; + } + /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, + * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta = d - FR_D90_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return -dr; + } + delta = d - (FR_D90_R16 + FR_D180_R16); + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return dr; + } + } + return fr_cos_bam(fr_deg_to_bam(deg, radix)); +} + +s32 fr_sin_deg(s32 deg, u16 radix) +{ + if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); + s32 sign = 1; + if (deg < 0) { deg = -deg; sign = -1; } + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return 0; + if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; + if (rem == 180) return 0; + if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; + } + s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); + return (sign < 0) ? -v : v; +} + +s32 FR_TanI(s32 deg) +{ + /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ + s32 rem = deg % 180; + if (rem == 90 || rem == -90) + return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + return fr_tan_bam(FR_DEG2BAM_I(deg)); +} + +/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */ +static s32 range_reduce_deg_bam_full(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + s32 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; } + return offset + deg_to_bam_full(d); +} + +s32 fr_tan_deg(s32 deg, u16 radix) +{ + if (radix == 0) return FR_TanI(deg); + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 tan_sign = 1; + if (deg < 0) { deg = -deg; tan_sign = -1; } + /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 deg_int = deg >> radix; + s32 rem = deg_int % 180; + if (rem == 0) return 0; + if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + s32 bam_full = range_reduce_deg_bam_full(deg, radix); + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; +} + +/*======================================================= + * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) + * + * Treats x and y as fixed-point values at the same radix r and returns + * (x*y) >> r at radix r. The user is responsible for tracking the radix + * point and for guaranteeing the product fits in 32 bits. + * + * Adds 0.5 LSB (0x8000) before the shift so the result rounds to + * nearest instead of truncating toward zero. + */ +s32 FR_FixMuls(s32 x, s32 y) +{ + int64_t v = (int64_t)x * (int64_t)y; + return (s32)((v + 0x8000) >> 16); +} + +/*======================================================= + * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest) + * + * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on + * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits + * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift). + */ +s32 FR_FixMulSat(s32 x, s32 y) +{ + int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16; + if (v > (int64_t)0x7fffffff) return FR_OVERFLOW_POS; + if (v < -(int64_t)0x80000000) return FR_OVERFLOW_NEG; + return (s32)v; +} + +/*======================================================= + FR_FixAddSat (x+y saturated add) + programmer must align radix points before using this function + */ +s32 FR_FixAddSat(s32 x, s32 y) +{ + s32 sum = x + y; + if (x < 0) + { + if (y < 0) + return (sum >= 0) ? FR_OVERFLOW_NEG : sum; + } + else + { + if (y >= 0) + return (sum <= 0) ? FR_OVERFLOW_POS : sum; + } + return sum; +} + +/* Inverse Trig + * acos with binary search of the BAM-native quadrant table. + * + * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant + * cos table for the table entry closest to |input|. Apply quadrant mirror + * if input was negative. + */ +/* FR_acos — returns radians at out_radix. + * Range: [0, pi]. Input is a cosine value at the given radix. + * + * Uses the 129-entry sine table in reverse: binary-search the ascending + * table to find asin(|input|), then acos = pi/2 - asin (with sign handling + * for the second quadrant). + */ +s32 FR_acos(s32 input, u16 radix, u16 out_radix) +{ + s32 v; + s16 sign; + s32 lo, hi, mid; + s32 idx, d, num, frac; + s32 input_abs; + + /* Work with absolute value at the caller's radix */ + sign = (s16)((input < 0) ? 1 : 0); + input_abs = sign ? -input : input; + + /* Clamp at the caller's radix */ + { + s32 one = (s32)1 << radix; + if (input_abs >= one) + return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; + } + + v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ + + /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table + * has poor resolution near the top (entries close together). + * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] + * means the input is > cos(7*π/256) ≈ 0.9975. */ + if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) + { + s32 one = (s32)1 << radix; + s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ + s32 two_omx = one_minus_x << 1; /* 2(1-|x|) at caller radix */ + s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ + s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); + if (sign) + rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; + return rad_out; + } + + /* Binary search on the ascending sine table. + * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). + * + * Find the first index where table[idx] >= v. */ + lo = 0; + hi = FR_TRIG_TABLE_SIZE; + while (lo < hi) + { + mid = (lo + hi) >> 1; + if ((s32)gFR_SIN_TAB_Q[mid] < v) + lo = mid + 1; + else + hi = mid; + } + + /* lo is now the first index where table[lo] >= v. + * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. + * This gives us the asin angle; acos = pi/2 - asin. */ + idx = lo; + if (idx <= 0) + { + idx = 0; + frac = 0; + } + else if (idx >= FR_TRIG_TABLE_SIZE) + { + idx = FR_TRIG_TABLE_SIZE - 1; + frac = 0; + } + else + { + /* Interpolate between table[idx-1] and table[idx]. + * d = table[idx] - table[idx-1] (>= 0, sin increasing) + * num = v - table[idx-1] (how far past table[idx-1]) + */ + d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; + num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; + if (d > 0) + frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; + else + frac = 0; + idx = idx - 1; + } + + { + /* asin_bam is the angle in first-quadrant BAM whose sin = v */ + u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ + u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); + if (sign) + bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ + return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); + } +} + +/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */ +s32 FR_asin(s32 input, u16 radix, u16 out_radix) +{ + /* asin(x) = pi/2 - acos(x) */ + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); + return half_pi - FR_acos(input, radix, out_radix); +} + +/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians + * at the specified output radix (s32). + * + * Range: [-pi, pi]. Returns 0 for atan2(0,0). + * + * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the + * angle with FR_asin or FR_acos (both use the 129-entry cosine table). + * To stay in the well-conditioned region of each inverse function we + * switch at 45°: + * |y| <= |x| → use asin(y/h) — asin stable near 0 + * |y| > |x| → use acos(x/h) — acos stable near pi/2 + * This keeps the derivative amplification factor below 1.414x everywhere. + */ +s32 FR_atan2(s32 y, s32 x, u16 out_radix) +{ + s32 ax, ay, h, q1_angle; + + /* Axis cases — exact angles, no divide. */ + if (x == 0) + { + if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ + if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ + return 0; + } + if (y == 0) + return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ + + ax = (x < 0) ? -x : x; + ay = (y < 0) ? -y : y; + + /* Normalise so max(ax,ay) sits in [2^14, 2^15). This gives + * FR_hypot_fast8 enough integer bits for the shift-only segments + * to produce an accurate ratio — critical when the raw inputs are + * small (e.g. atan2(1,1) at radix 0). Scaling both by the same + * power of two doesn't change the angle. */ + { + s32 mx = (ax > ay) ? ax : ay; + while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; } + while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; } + } + + h = FR_hypot_fast8((s32)ax, (s32)ay); + if (h == 0) return 0; /* degenerate */ + + /* Compute the first-quadrant angle (positive, [0..pi/2]). + * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15). + * + * Small-angle fast path: when the minor-axis ratio is small, + * asin(x) ≈ x (error < x³/6). Below ~5° the cubic term is + * smaller than the table-lookup error, so the direct identity + * is both faster and more accurate. Threshold 2753 at r15 + * corresponds to sin(~4.8°) = 0.084. */ + #define FR_ATAN2_SMALL 2753 + if (ay <= ax) + { + /* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */ + s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h); + if (sin_val < FR_ATAN2_SMALL) + q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix); + else + q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix); + } + else + { + /* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */ + s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h); + if (cos_val < FR_ATAN2_SMALL) + { + /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); + q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); + } + else + q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix); + } + + /* Apply quadrant from signs of x and y. + * q1_angle is always positive [0..pi/2]. */ + { + s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); + if (x > 0) + return (y > 0) ? q1_angle : -q1_angle; + /* x < 0: mirror across y-axis */ + return (y > 0) ? (pi - q1_angle) : (q1_angle - pi); + } +} + +/* FR_atan(input, radix, out_radix) — arctangent of a single argument. + * Returns radians at out_radix, range [-pi/2, pi/2]. + */ +s32 FR_atan(s32 input, u16 radix, u16 out_radix) +{ + s32 one = (s32)1 << radix; + return FR_atan2(input, one, out_radix); +} + +/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16 + * fixed point. Entry i = round(2^(i/64) * 65536). Size: 260 bytes. + * Used by FR_pow2 to look up the fractional power of 2 with linear + * interpolation. + */ +static const u32 gFR_POW2_FRAC_TAB[65] = { + 65536, 66250, 66971, 67700, 68438, 69183, 69936, 70698, + 71468, 72246, 73032, 73828, 74632, 75444, 76266, 77096, + 77936, 78785, 79642, 80510, 81386, 82273, 83169, 84074, + 84990, 85915, 86851, 87796, 88752, 89719, 90696, 91684, + 92682, 93691, 94711, 95743, 96785, 97839, 98905, 99982, + 101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031, + 110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899, + 120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660, + 131072 +}; + +/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix. + * + * Algorithm: split input into floor(integer) and fractional part. The + * fractional part is in [0, 1) by construction (Euclidean / mathematical + * floor — the fractional part of -2.3 is +0.7, not -0.3). Then + * 2^(int + frac) = 2^int * 2^frac + * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int + * is a shift. + * + * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table). + * Linear interpolation leaves a small concavity error in each interval. + */ +s32 FR_pow2(s32 input, u16 radix) +{ + s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result; + u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; + + /* Mathematical floor: for positive input it's input>>radix; for + * negative input we need to round toward -infinity, not toward zero. + */ + if (input >= 0) + { + flr = (s32)((u32)input >> radix); + frac_full = (s32)((u32)input & mask); + } + else + { + s32 neg = -input; + s32 nflr = (s32)((u32)neg >> radix); + s32 nfrc = (s32)((u32)neg & mask); + if (nfrc == 0) + { + flr = -nflr; + frac_full = 0; + } + else + { + flr = -nflr - 1; /* floor toward -inf */ + frac_full = (s32)((1L << radix) - nfrc); + } + } + + /* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */ + if (radix > 16) + frac_full >>= (radix - 16); + else if (radix < 16) + frac_full <<= (16 - radix); + /* now frac_full is in [0, 65536) representing fractional in s.16. */ + + /* Top 6 bits index the table; bottom 10 are the interpolation fraction. */ + idx = frac_full >> 10; + frac_lo = frac_full & ((1L << 10) - 1); + lo = (s32)gFR_POW2_FRAC_TAB[idx]; + hi = (s32)gFR_POW2_FRAC_TAB[idx + 1]; + mant = lo + (((hi - lo) * frac_lo) >> 10); /* mant in s.16, in [1.0, 2.0) */ + + /* Apply integer shift. mant is at radix 16. We want output at `radix`. + * If radix == 16: just shift mant. + * Otherwise re-radix mant first. + */ + if (flr >= 0) + { + /* result = mant << flr, then re-radix to caller's radix. */ + if (flr >= 30) + return FR_OVERFLOW_POS; + result = mant << flr; + return FR_CHRDX(result, 16, radix); + } + else + { + /* mant >> -flr at radix 16, then re-radix. */ + s32 sh = -flr; + if (sh >= 30) + return 0; /* underflow */ + result = mant >> sh; + return FR_CHRDX(result, 16, radix); + } +} + +/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m) + * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the + * interpolation between idx and idx+1 never reads out of bounds. + * Size: 260 bytes. Entry i = round(log2(1 + i/64) * 65536). + */ +static const u32 gFR_LOG2_MANT_TAB[65] = { + 0, 1466, 2909, 4331, 5732, 7112, 8473, 9814, + 11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909, + 21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029, + 30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346, + 38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990, + 45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063, + 52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643, + 59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794, + 65536 +}; + +/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number. + * + * input : value to take log2 of, treated as a positive sM.radix value. + * radix : number of fractional bits in `input`. + * output_radix : number of fractional bits in the result. + * + * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we + * return a large negative sentinel rather than crash). + * + * Algorithm: + * 1. Find p, the position of the leading 1 bit of `input`. + * log2(input) = p + log2(input / 2^p), where the second term is in + * [0, 1) because (input / 2^p) is in [1, 2). + * 2. Normalize the mantissa to s1.31 by shifting `input` so its top bit + * sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1). + * 3. Look up log2(m) in the 65-entry table with linear interpolation + * across the next 24 bits. Result is in s.16. + * 4. integer_part = (p - radix), then result = (integer_part << 16) + + * mantissa_log2. + * 5. Re-radix to the requested output_radix via FR_CHRDX. + * + * Worst-case absolute error: ~6e-5 in log2 units (65-entry table). + */ +s32 FR_log2(s32 input, u16 radix, u16 output_radix) +{ + s32 p, integer_part, idx, frac, lo, hi, mant_log2, result; + u32 m, u; + + if (input <= 0) + return FR_LOG2MIN; + + /* Step 1: find the position of the leading 1 bit. */ + u = (u32)input; + p = 0; + while (u > 1) + { + u >>= 1; + p++; + } + + /* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa). + * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31). + * The fractional part of m / 2^30 is in [0, 1), and that's what we look + * up in the table. + */ + if (p >= 30) + m = (u32)input >> (p - 30); + else + m = (u32)input << (30 - p); + + /* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part + * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6 + * bits of m_frac; the lower 24 bits are the interpolation fraction. + */ + m -= (1u << 30); + idx = (s32)(m >> 24); /* 6 bits */ + frac = (s32)(m & ((1u << 24) - 1)); /* 24 bits */ + lo = (s32)gFR_LOG2_MANT_TAB[idx]; + hi = (s32)gFR_LOG2_MANT_TAB[idx + 1]; + mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24); + + /* Step 3: assemble. integer_part = p - radix. */ + integer_part = p - (s32)radix; + result = (integer_part << 16) + mant_log2; + + /* Step 4: re-radix to output_radix. */ + return FR_CHRDX(result, 16, output_radix); +} + +s32 FR_ln(s32 input, u16 radix, u16 output_radix) +{ + s32 r = FR_log2(input, radix, output_radix); + return FR_MULK28(r, FR_krLOG2E_28); +} + +s32 FR_log10(s32 input, u16 radix, u16 output_radix) +{ + s32 r = FR_log2(input, radix, output_radix); + return FR_MULK28(r, FR_krLOG2_10_28); +} + +#ifndef FR_NO_PRINT +/*************************************** + * FR_printNumD - write a decimal integer with space padding. + * + * Equivalent to "%*d" in printf, modulo the return convention. + * + * f : per-character output function (e.g. putchar). Must not be NULL. + * n : signed integer to print. + * pad : minimum field width; spaces are prepended to reach this width. + * + * Returns the number of characters written on success, or -1 if `f` is NULL. + */ +int FR_printNumD(int (*f)(char), int n, int pad) +{ + unsigned int mag; + int written = 0, neg = 0; + int digits = 1; + unsigned int t; + + if (!f) + return -1; + + if (n < 0) + { + neg = 1; + mag = (unsigned int)(-(long)n); /* safe for INT_MIN */ + } + else + { + mag = (unsigned int)n; + } + + /* Count decimal digits in mag (always at least 1 for n=0). */ + t = mag; + while (t >= 10) + { + t /= 10; + digits++; + } + + /* Pad with spaces. The total width includes the sign. */ + { + int total = digits + (neg ? 1 : 0); + while (pad-- > total) + { + f(' '); + written++; + } + } + + if (neg) + { + f('-'); + written++; + } + + /* Print digits MSB first by computing the largest power of 10 <= mag. */ + { + unsigned int p = 1; + int i; + for (i = 1; i < digits; i++) + p *= 10; + while (p > 0) + { + f((char)('0' + (mag / p) % 10)); + written++; + if (p == 1) + break; + p /= 10; + } + } + + return written; +} + +/*************************************** + * FR_printNumF - write a fixed-point number as a decimal floating-point string. + * + * f : per-character output function. Must not be NULL. + * n : signed fixed-point value at the given radix. + * radix : number of fractional bits in `n`. + * pad : minimum field width (including sign and decimal point). + * prec : number of fractional digits to print. + * + * Returns the number of characters written on success, -1 if `f` is NULL. + * + * Rounding policy: truncates fractional digits beyond `prec` (no rounding). + */ +int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec) +{ + unsigned int mag_int; + u32 mag_frac; + u32 frac_mask; + int written = 0, neg = 0; + int int_digits = 1; + int total; + unsigned int t; + + if (!f) + return -1; + + frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; + + if (n < 0) + { + neg = 1; + /* Negate as unsigned to avoid INT_MIN overflow. */ + u32 un = (u32)(-(int64_t)n); + mag_int = (unsigned int)(un >> radix); + mag_frac = un & frac_mask; + } + else + { + mag_int = (unsigned int)((u32)n >> radix); + mag_frac = (u32)n & frac_mask; + } + + /* Count integer digits. */ + t = mag_int; + while (t >= 10) + { + t /= 10; + int_digits++; + } + + /* Total visible width = sign + int + (dot + prec digits if prec>0). */ + total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0); + while (pad-- > total) + { + f(' '); + written++; + } + + if (neg) + { + f('-'); + written++; + } + + /* Print integer part. */ + { + unsigned int p = 1; + int i; + for (i = 1; i < int_digits; i++) + p *= 10; + while (p > 0) + { + f((char)('0' + (mag_int / p) % 10)); + written++; + if (p == 1) + break; + p /= 10; + } + } + + /* Print fractional part. Extract one decimal digit at a time: + * frac' = frac * 10 + * digit = frac' >> radix + * frac = frac' & frac_mask + */ + if (prec > 0) + { + f('.'); + written++; + while (prec-- > 0) + { + u32 scaled; + int digit; + scaled = (u32)(((uint64_t)mag_frac * 10)); + digit = (int)(scaled >> radix); + mag_frac = scaled & frac_mask; + f((char)('0' + (digit % 10))); + written++; + } + } + + return written; +} + +/*************************************** + * FR_printNumH - write an integer as hexadecimal. + * + * f : per-character output function. Must not be NULL. + * n : integer to print (interpreted as unsigned for the digits). + * showPrefix : if non-zero, prepend "0x". + * + * Returns the number of characters written on success, -1 if f is NULL. + */ +int FR_printNumH(int (*f)(char), int n, int showPrefix) +{ + unsigned int u = (unsigned int)n; + int written = 0; + int x = (int)((sizeof(int) << 1) - 1); + int d; + + if (!f) + return -1; + + if (showPrefix) + { + f('0'); + f('x'); + written += 2; + } + + do + { + d = (int)((u >> (x << 2)) & 0xf); + d = (d > 9) ? (d - 0xa + 'a') : (d + '0'); + f((char)d); + written++; + } while (x--); + + return written; +} + +/*======================================================= + * FR_numstr — parse a decimal string into a fixed-point value. + * + * This is the runtime inverse of FR_printNumF: given a string like + * "12.34" or "-0.05" and a radix (number of fractional bits), it + * returns the s32 fixed-point representation. + * + * Features: + * - Leading whitespace is skipped. + * - Optional sign ('+' or '-'). + * - Up to 9 fractional digits are used (s32 range). + * - No malloc, no strtod, no libm. + * + * Returns 0 for NULL or empty input. + */ +s32 FR_numstr(const char *s, u16 radix) +{ + static const s32 pow10[10] = { + 1L, 10L, 100L, 1000L, 10000L, + 100000L, 1000000L, 10000000L, 100000000L, 1000000000L + }; + s32 int_part = 0, frac_part = 0; + int frac_digits = 0, neg = 0; + s32 result; + + if (!s || !*s) return 0; + + while (*s == ' ' || *s == '\t') s++; /* skip whitespace */ + if (*s == '-') { neg = 1; s++; } /* sign */ + else if (*s == '+') { s++; } + + while (*s >= '0' && *s <= '9') /* integer part */ + { int_part = int_part * 10 + (*s - '0'); s++; } + + if (*s == '.') { /* fractional part */ + s++; + while (*s >= '0' && *s <= '9') { + if (frac_digits < 9) + { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; } + s++; + } + } + + result = int_part << radix; + if (frac_digits > 0) + result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]); + + return neg ? -result : result; +} +#endif /* FR_NO_PRINT */ + +/*======================================================= + * Square root and hypot + * + * fr_isqrt64 is a private helper implementing the digit-by-digit + * ("shift-and-subtract") integer square root. The core loop computes + * floor(sqrt(n)), then a final remainder check rounds to nearest. + * Uses no division. At most 32 iterations. + */ +static u32 fr_isqrt64(uint64_t n) +{ + uint64_t root = 0; + uint64_t bit = (uint64_t)1 << 62; + while (bit > n) bit >>= 2; + while (bit != 0) + { + uint64_t trial = root + bit; + if (n >= trial) + { + n -= trial; + root = (root >> 1) + bit; + } + else + { + root >>= 1; + } + bit >>= 2; + } + /* round to nearest: if remainder > root, (root+1)^2 is closer */ + if (n > root) + root++; + return (u32)root; +} + +/*======================================================= + * FR_sqrt - fixed-radix square root. + * + * input : value at radix `radix`. Must be >= 0. + * radix : fractional bits of input AND result. + * return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0. + * + * Math: sqrt(input_fp / 2^r) at radix r is + * result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r) + * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This + * works for any input that fits in s32 and any radix in [0, 30]. + * + * Precision: round-to-nearest sqrt. Worst-case absolute error is + * <= 0.5 LSB at the requested radix. + * Always non-negative for non-negative input. Result is monotone in + * input. + * + * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller + * can test `result == FR_DOMAIN_ERROR` to detect domain errors. + * + * Side effects: none. Pure function. + */ +s32 FR_sqrt(s32 input, u16 radix) +{ + uint64_t n; + + if (input < 0) + return FR_DOMAIN_ERROR; + if (input == 0) + return 0; + + n = (uint64_t)(u32)input << radix; + return (s32)fr_isqrt64(n); +} + +/*======================================================= + * FR_hypot - sqrt(x*x + y*y) without intermediate overflow. + * + * x, y : values at radix `radix` + * radix : fractional bits of inputs AND result + * return : sqrt(x*x + y*y) at radix `radix`. + * + * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix + * value yields an r-radix result, so no extra shifting is needed. The + * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never + * overflows for any s32 inputs. + * + * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB + * at the requested radix. + * + * Side effects: none. Pure function. + */ +s32 FR_hypot(s32 x, s32 y, u16 radix) +{ + uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); + uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y); + (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ + return (s32)fr_isqrt64(xx + yy); +} + +/*======================================================= + * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. + * + * Shift-only, no multiply, no 64-bit. Based on the piecewise-linear + * method described in US Patent 6,567,777 B1 (Chatterjee, expired). + * Peak error: ~0.10%. + */ +s32 FR_hypot_fast8(s32 x, s32 y) +{ + s32 hi, lo; + + /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */ + if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x; + if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y; + + /* hi = max(|x|,|y|), lo = min(|x|,|y|) */ + if (x > y) { hi = x; lo = y; } + else { hi = y; lo = x; } + + if (hi == 0) return 0; + + /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo. + * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */ + if ((hi >> 1) < lo) { + /* β in (0.5, 1.0] */ + if (lo > hi - (hi >> 2)) { + /* β in (0.75, 1.0] */ + if (lo > hi - (hi >> 3)) /* β > 0.875 */ + /* a≈0.7305, b≈0.6836 */ + return hi - (hi >> 2) - (hi >> 6) - (hi >> 8) + + lo - (lo >> 2) - (lo >> 4) - (lo >> 8); + else /* β in (0.75, 0.875] */ + /* a≈0.7803, b≈0.6262 */ + return hi - (hi >> 2) + (hi >> 5) - (hi >> 10) + + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12); + } else { + /* β in (0.5, 0.75] */ + if (lo > hi - (hi >> 1) + (hi >> 3)) /* β > 0.625 */ + /* a≈0.8281, b≈0.5630 */ + return hi - (hi >> 2) + (hi >> 4) + (hi >> 6) + + (lo >> 1) + (lo >> 4) + (lo >> 11); + else /* β in (0.5, 0.625] */ + /* a≈0.8728, b≈0.4893 */ + return hi - (hi >> 3) - (hi >> 9) - (hi >> 12) + + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10); + } + } else { + /* β in [0, 0.5] */ + if ((hi >> 2) < lo) { + /* β in (0.25, 0.5] */ + if ((hi >> 1) - (hi >> 3) < lo) /* β > 0.375 */ + /* a≈0.9180, b≈0.3984 */ + return hi - (hi >> 4) - (hi >> 6) - (hi >> 8) + + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7); + else /* β in (0.25, 0.375] */ + /* a≈0.9551, b≈0.2988 */ + return hi - (hi >> 4) + (hi >> 6) + (hi >> 9) + + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9); + } else { + /* β in [0, 0.25] */ + if ((hi >> 3) < lo) /* β in (0.125, 0.25] */ + /* a≈0.9839, b≈0.1838 */ + return hi - (hi >> 6) - (hi >> 11) + + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12); + else /* β in [0, 0.125] */ + /* a≈0.9990, b≈0.0620 */ + return hi - (hi >> 10) + + (lo >> 4) - (lo >> 11); + } + } +} + +#ifndef FR_NO_WAVES +/*======================================================= + * Wave generators — synth-style fixed-shape waveforms. + * + * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle) + * and return s16 in s0.15 format, clamped to [-32767, +32767] to match + * the trig amplitude convention used by fr_cos_bam / fr_sin_bam. + * + * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for + * a given output frequency, then accumulate it (mod 2^16) per sample. + * + * Side effects: pure functions (except fr_wave_noise which advances a + * caller-provided LFSR state pointer). + */ + +/* fr_wave_sqr - 50%-duty square wave. + * phase < pi (BAM<0x8000) → +full; phase >= pi → -full. + */ +s16 fr_wave_sqr(u16 phase) +{ + return (phase < 0x8000) ? (s16)32767 : (s16)-32767; +} + +/* fr_wave_pwm - variable-duty pulse. + * `duty` is the BAM threshold: phase < duty → high, else low. + * duty = 0 → always low + * duty = 0x8000 → 50% duty (same as fr_wave_sqr) + * duty = 0xffff → high almost everywhere (one BAM step low) + */ +s16 fr_wave_pwm(u16 phase, u16 duty) +{ + return (phase < duty) ? (s16)32767 : (s16)-32767; +} + +/* fr_wave_saw - rising sawtooth. + * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff), + * passing through 0 at phase=0x8000. The single boundary case phase=0 + * (which would naturally produce -32768) is clamped to -32767 to keep the + * amplitude symmetric. + */ +s16 fr_wave_saw(u16 phase) +{ + s32 v = (s32)phase - (s32)0x8000; + if (v < -32767) v = -32767; + return (s16)v; +} + +/* fr_wave_tri - symmetric triangle. + * Four linear segments: + * Q1 [0, 0x4000) : rising 0 → +peak + * Q2 [0x4000, 0x8000): falling +peak → 0 + * Q3 [0x8000, 0xc000): falling 0 → -peak + * Q4 [0xc000, 0x10000): rising -peak → 0 + * Peaks are clamped to +/-32767 (the natural unclamped formula gives + * +/-32768 at the exact peak BAM). + */ +s16 fr_wave_tri(u16 phase) +{ + s32 t; + if (phase < 0x8000) + { + /* First half: 0 -> +peak -> 0 */ + if (phase < 0x4000) + t = (s32)phase << 1; /* 0 .. 0x7ffe */ + else + t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */ + if (t > 32767) t = 32767; + return (s16)t; + } + else + { + /* Second half: 0 -> -peak -> 0 */ + if (phase < 0xc000) + t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */ + else + t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */ + if (t > 32767) t = 32767; + return (s16)-t; + } +} + +/* fr_wave_tri_morph - variable-symmetry triangle. + * + * phase : u16 BAM + * break_point : u16 BAM where the wave reaches its positive peak. + * + * Going from 0 to +peak in [0, break_point), then from +peak back to 0 + * in [break_point, 0xffff]. The result is a triangle whose rising and + * falling slopes can differ. + * + * break_point = 0x8000 → symmetric triangle + * break_point = 0xffff → rising sawtooth (instant fall) + * break_point = 0x0001 → falling sawtooth (instant rise) + * break_point = 0 → degenerate; treated as 1 to avoid div-by-zero + * + * Note that this version returns values in [0, 32767] only (not bipolar). + * Caller can subtract 16384 and double if a bipolar version is desired. + * + * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20 + * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if + * those targets matter to you. + */ +s16 fr_wave_tri_morph(u16 phase, u16 break_point) +{ + u32 t; + if (break_point == 0) + break_point = 1; + if (phase < break_point) + { + /* rising: 0 at phase=0, 32767 at phase=break_point */ + t = (u32)(((u32)phase * 32767UL) / (u32)break_point); + } + else + { + /* falling: 32767 at phase=break_point, 0 at phase=0xffff */ + u32 span = (u32)0xffff - (u32)break_point; + if (span == 0) + return 32767; + t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span); + } + if (t > 32767) t = 32767; + return (s16)t; +} + +/* fr_wave_noise - LFSR-based pseudorandom noise. + * + * state : pointer to a u32 the caller maintains. Initial value must + * be non-zero (zero is a fixed point of the LFSR). A common + * seed is 0xACE1u or any other non-zero constant. + * + * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish). + * Implementation: 32-bit Galois LFSR with the standard maximal-period + * tap polynomial 0xD0000001 (period 2^32 - 1 samples). + * + * Quality: this is "fast white noise" suitable for synth use. It is NOT + * cryptographically secure. For better statistical properties (FFT + * flatness etc.) layer a longer LFSR or use a separate PRNG. + */ +s16 fr_wave_noise(u32 *state) +{ + u32 lsb; + if (!state) + return 0; + lsb = *state & 1u; + *state >>= 1; + if (lsb) + *state ^= 0xD0000001u; + /* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */ + { + s32 v = (s32)((*state >> 16) & 0xffffu) - 32768; + if (v < -32767) v = -32767; + return (s16)v; + } +} + +/*======================================================= + * ADSR envelope generator + * + * Linear-segment Attack-Decay-Sustain-Release envelope. State is held + * in caller-allocated fr_adsr_t struct (no global state, no malloc). + * + * Lifecycle: + * 1. Caller allocates an fr_adsr_t (stack or static). + * 2. fr_adsr_init() once per patch with attack/decay/release durations + * in samples and a sustain level in s0.15. + * 3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk` + * samples, falls peak -> sustain over `dec` samples, then holds. + * 4. fr_adsr_release() on note-off. Output falls current -> 0 over a + * time controlled by the release rate (rate, not duration: the + * time depends on where in the envelope we are). + * 5. fr_adsr_step() once per audio sample to read the current value. + * + * Internal precision: levels are stored as s32 in s1.30 format so even + * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second) + * have a non-zero per-sample increment. Output is converted to s0.15. + * + * Saturation: the envelope state machine is self-clamping; level cannot + * escape [0, 1<<30]. Output is in [0, 32767]. + */ + +#define FR_ADSR_PEAK_S130 ((s32)1 << 30) + +void fr_adsr_init(fr_adsr_t *env, + u32 attack_samples, + u32 decay_samples, + s16 sustain_level_s015, + u32 release_samples) +{ + if (!env) + return; + env->state = FR_ADSR_IDLE; + env->level = 0; + + /* sustain_level_s015 is s16 so its upper bound (32767) is already the + * type's max; only the lower bound needs an explicit clamp. */ + if (sustain_level_s015 < 0) + sustain_level_s015 = 0; + /* Convert s0.15 -> s1.30 by shifting left 15. */ + env->sustain = (s32)sustain_level_s015 << 15; + + env->attack_inc = (attack_samples > 0) + ? (s32)(FR_ADSR_PEAK_S130 / attack_samples) + : FR_ADSR_PEAK_S130; + env->decay_dec = (decay_samples > 0) + ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples) + : (FR_ADSR_PEAK_S130 - env->sustain); + env->release_dec = (release_samples > 0) + ? (s32)(FR_ADSR_PEAK_S130 / release_samples) + : FR_ADSR_PEAK_S130; +} + +void fr_adsr_trigger(fr_adsr_t *env) +{ + if (!env) + return; + env->state = FR_ADSR_ATTACK; + env->level = 0; +} + +void fr_adsr_release(fr_adsr_t *env) +{ + if (!env) + return; + env->state = FR_ADSR_RELEASE; +} + +s16 fr_adsr_step(fr_adsr_t *env) +{ + if (!env) + return 0; + switch (env->state) + { + case FR_ADSR_ATTACK: + env->level += env->attack_inc; + if (env->level >= FR_ADSR_PEAK_S130) + { + env->level = FR_ADSR_PEAK_S130; + env->state = FR_ADSR_DECAY; + } + break; + case FR_ADSR_DECAY: + env->level -= env->decay_dec; + if (env->level <= env->sustain) + { + env->level = env->sustain; + env->state = FR_ADSR_SUSTAIN; + } + break; + case FR_ADSR_SUSTAIN: + env->level = env->sustain; + break; + case FR_ADSR_RELEASE: + env->level -= env->release_dec; + if (env->level <= 0) + { + env->level = 0; + env->state = FR_ADSR_IDLE; + } + break; + case FR_ADSR_IDLE: + default: + env->level = 0; + break; + } + /* s1.30 -> s0.15: shift right 15. Clamp for safety. */ + { + s32 out = env->level >> 15; + if (out < 0) out = 0; + if (out > 32767) out = 32767; + return (s16)out; + } +} +#endif /* FR_NO_WAVES */ diff --git a/docs/README.md b/docs/README.md index dad53da..c7a900a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -45,30 +45,30 @@ radix — Q16.16 is just the reference point for the table. See the [TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12, 16, and 24. - -| Function | Max err (%)*| Avg err (%) | Note | -|---|---:|---:|---| -| sin/cos (BAM) | 0.4578 | 0.0076 | fr_sin_bam/fr_cos_bam direct; 129-entry table | -| sin/cos (deg) | 0.4578 | 0.0076 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | -| sin/cos (rad) | 0.6104 | 0.0085 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | -| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | FR_Tan ±360° s15.16 full; sat at poles | -| tan (rad) | 13.4069 | 0.0029 | fr_tan ±2π r16 full; sat at poles | -| asin / acos | 0.8743 | 0.0301 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.5100 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.3390 | 0.0154 | 20001-pt full sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0239 | 0.0000 | Round-to-nearest | -| log2 | 0.0286 | 0.0029 | 65-entry mantissa table | -| pow2 | 0.0019 | 0.0003 | 65-entry fraction table | -| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | -| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | -| pow10 | 0.0007 | 0.0000 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.0028 | 0.0002 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | - -*Relative error; reference clamped to 1% of full-scale output. + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table | +| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | +| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | +| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles | +| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass | +| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary | +| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | +| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | 65-entry mantissa table | +| pow2 | 0.0018 | 0.0004 | 65-entry fraction table | +| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | +| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. ## What's in the box @@ -221,14 +221,14 @@ understand *how* the radix notation works first. | Multiply-free option | No | No | Yes (e.g. `FR_EXP_FAST`, `FR_hypot_fast8`) | | Wave generators | No | No | 6 shapes + ADSR | | Dependencies | None | ARM only | None | -| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 4.2 KB | +| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 3.4 KB lean / 5.7 KB full | Sizes measured with `arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -Os`. libfixmath covers trig/sqrt/exp in Q16.16 only; FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. -See [`docker/build_sizes.sh`](../docker/build_sizes.sh) for the build -script. +See [`scripts/crossbuild-docker.sh`](../scripts/crossbuild-docker.sh) for +the build script. ## History diff --git a/docs/building.md b/docs/building.md index 4a17c1a..fea0142 100644 --- a/docs/building.md +++ b/docs/building.md @@ -160,39 +160,51 @@ you do *not* need `libm`. ### Code size (.text section, compiled with `-Os`) -Sizes are for `FR_math.c` compiled with `-Os -ffreestanding`. -Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves). +Sizes are for `FR_math.c` compiled with `-Os`. +Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt). +Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot). +Full = all features (+ print, waves, ADSR). With `-ffunction-sections` and linker `--gc-sections`, only the functions your application references are linked, so real flash usage will be smaller. -| Target | Core | Full | -|--------|-----:|-----:| -| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB | -| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB | -| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB | -| ESP32 (Xtensa) | 3.5 KB | 5.2 KB | -| 68k | 3.5 KB | 5.3 KB | -| x86-64 (GCC) | 3.5 KB | 5.7 KB | -| x86-32 | 4.5 KB | 6.8 KB | -| MSP430 (16-bit) | 5.9 KB | 8.9 KB | -| 68HC11 | 10.8 KB | 16.0 KB | -| AVR (ATmega328P) | 7.0 KB | 10.6 KB | +| Target | Lean | Core | Full | +|--------|-----:|-----:|-----:| +| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB | +| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB | +| ARM Thumb | 3.4 KB | 4.7 KB | 5.9 KB | +| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | +| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB | +| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | +| ARM32 | 4.3 KB | 5.8 KB | 7.7 KB | +| 68k | 4.4 KB | 6.2 KB | 7.8 KB | +| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | +| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | +| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | +| PowerPC | 5.8 KB | 8.0 KB | 10.4 KB | +| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | +| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB | +| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB | ### Lean build options -Three compile-time `#define` guards let you strip optional subsystems +Compile-time `#define` guards let you strip optional subsystems for ROM-constrained targets. Define them before including `FR_math.h` (or pass `-D` on the compiler command line): | Define | What it removes | Typical savings | |---|---|---| -| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB | +| `FR_LEAN` | Degree trig, BAM tan, angle converters, `FR_log10`, `FR_hypot`, waves + ADSR | ~3.7 KB | +| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB | | `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB | | `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB | +`FR_LEAN` keeps only radian trig (sin, cos, tan), inverse trig, sqrt, +log2, ln, exp, pow2, and arithmetic — comparable to libfixmath's API at +4.7 KB text. `FR_LEAN` implies `FR_NO_WAVES`. + `FR_CORE_ONLY` is a convenience shorthand that defines both `FR_NO_PRINT` and `FR_NO_WAVES` in one step. diff --git a/docs/examples.md b/docs/examples.md index d07a477..f3e0bed 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,7 +1,7 @@ # Examples Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.0 library with: +example compiles cleanly against the v2.0.8 library with: ```bash cc -Isrc example.c src/FR_math.c -o example diff --git a/docs/getting-started.md b/docs/getting-started.md index 9eac417..48028ce 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -13,8 +13,8 @@ manager integration and no install step. Either: - Copy `src/FR_math.c`, `src/FR_math.h`, `src/FR_defs.h` (and optionally - `src/FR_math_2D.cpp`, `src/FR_math_2D.h`, - and `src/FR_trig_table.h`) into the target project, **or** + `src/FR_math_2D.cpp`, `src/FR_math_2D.h`) + into the target project, **or** - Add FR_Math as a git submodule and point the build system at `src/`. diff --git a/idf_component.yml b/idf_component.yml index 6a0d030..8097972 100644 --- a/idf_component.yml +++ b/idf_component.yml @@ -1,4 +1,4 @@ -version: "2.0.7" +version: "2.0.8" description: "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms. Zero dependencies." url: "https://github.com/deftio/fr_math" repository: "https://github.com/deftio/fr_math.git" diff --git a/library.json b/library.json index 495a89f..17f9649 100644 --- a/library.json +++ b/library.json @@ -1,6 +1,6 @@ { "name": "FR_Math", - "version": "2.0.7", + "version": "2.0.8", "description": "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms in 4KB of flash. Zero dependencies.", "keywords": [ "fixed-point", diff --git a/library.properties b/library.properties index cd2d953..47dde32 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=FR_Math -version=2.0.7 +version=2.0.8 author=M. A. Chatterjee maintainer=M. A. Chatterjee sentence=Compact fixed-point math library for embedded systems. 4KB flash, zero dependencies, any radix. diff --git a/llms.txt b/llms.txt index 7c0ce3e..4f13ef1 100644 --- a/llms.txt +++ b/llms.txt @@ -9,7 +9,7 @@ or libraries. Pure C99, zero dependencies beyond ``. - Repository: https://github.com/deftio/fr_math - Documentation: https://deftio.github.io/fr_math/ - License: BSD-2-Clause -- Version: 2.0.7 +- Version: 2.0.8 ## Key concept: radix parameter @@ -25,9 +25,8 @@ Common radix choices: ## Source files - `src/FR_math.h` — all public declarations, macros, constants -- `src/FR_math.c` — all function implementations (~42KB) +- `src/FR_math.c` — all function implementations (trig tables inlined, ~45KB) - `src/FR_defs.h` — type aliases: s8, s16, s32, u8, u16, u32 -- `src/FR_trig_table.h` — precomputed cosine quadrant table (129 entries) + tangent octant table (65 entries) - `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++) ## Types diff --git a/makefile b/makefile index 83a4e64..c3f0c74 100644 --- a/makefile +++ b/makefile @@ -54,7 +54,8 @@ help: @echo " coverage Generate coverage report (gcov)" @echo " coverage-basic Basic coverage info without lcov" @echo " coverage-html HTML coverage report (requires lcov)" - @echo " size-report Multi-architecture size report" + @echo " size-report Multi-architecture size report (Docker)" + @echo " size-update Size report + patch doc files" @echo " size-simple Size report for current platform" @echo "" @echo "Tools:" @@ -196,10 +197,15 @@ coverage-html: clean dirs @echo "HTML report: $(COV_DIR)/html/index.html" @genhtml $(COV_DIR)/coverage.info --output-directory $(COV_DIR)/html -# Size report - multi-architecture +# Size report - multi-architecture (Docker cross-compilation) .PHONY: size-report size-report: dirs - @scripts/size_report.sh + @scripts/crossbuild_sizes.sh + +# Size report + patch doc files +.PHONY: size-update +size-update: dirs + @scripts/crossbuild_sizes.sh --update # Simple size report for current platform .PHONY: size-simple @@ -215,6 +221,33 @@ size-simple: lib ls -lh $(BUILD_DIR)/*.o; \ fi +# Lean build: only functions with libfixmath equivalents (radian trig, +# inverse trig, sqrt, log2, ln, exp, mul/div — no degree trig, no BAM +# tan, no waves, no hypot exact, no log10). +.PHONY: size-lean +size-lean: dirs + @echo "=== LEAN Build (FR_LEAN — libfixmath-equivalent API only) ===" + @$(CC) -I$(SRC_DIR) $(LIB_WARN) -DFR_LEAN -DFR_NO_PRINT -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_lean.o + @size $(BUILD_DIR)/FR_math_lean.o + @echo "" + +# Full build: everything (default — all trig, waves, ADSR, print, etc.) +.PHONY: size-full +size-full: dirs + @echo "=== FULL Build (all features) ===" + @$(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_full.o + @size $(BUILD_DIR)/FR_math_full.o + @echo "" + +# Side-by-side lean vs full size comparison +.PHONY: size-compare +size-compare: size-lean size-full + @echo "=== Lean vs Full Comparison ===" + @LEAN=$$(size $(BUILD_DIR)/FR_math_lean.o | tail -1 | awk '{print $$1}'); \ + FULL=$$(size $(BUILD_DIR)/FR_math_full.o | tail -1 | awk '{print $$1}'); \ + echo " Lean text: $${LEAN} bytes"; \ + echo " Full text: $${FULL} bytes" + # Tools TOOLS_DIR = tools @@ -232,7 +265,7 @@ $(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR .PHONY: clean clean: rm -rf $(BUILD_DIR) $(COV_DIR) - rm -f *.o *.gcda *.gcno *.exe *.info + rm -f *.o *.gcda *.gcno *.gcov *.exe *.info .PHONY: cleanall cleanall: clean @@ -250,7 +283,7 @@ coverage-basic: clean dirs @echo "" @echo "=== Basic Coverage Info ===" @if command -v gcov >/dev/null 2>&1; then \ - gcov $(SRC_DIR)/FR_math.c -o $(BUILD_DIR) | grep -E "File|Lines executed"; \ + cd $(BUILD_DIR) && gcov FR_math.o | grep -E "File|Lines executed"; \ echo ""; \ echo "For detailed coverage report, install lcov and run: make coverage"; \ else \ diff --git a/pages/assets/site.js b/pages/assets/site.js index a686d8e..d4d0ecc 100644 --- a/pages/assets/site.js +++ b/pages/assets/site.js @@ -16,7 +16,7 @@ ════════════════════════════════════════════════════════════════════ */ (function () { - var FR_VERSION = 'v2.0.7'; + var FR_VERSION = 'v2.0.8'; // Detect whether we're a top-level page or inside guide/. // Works for both file:// and http(s):// because we look for the diff --git a/pages/guide/building.html b/pages/guide/building.html index 2ea77bc..e4c4944 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -182,45 +182,57 @@

    Cross-compilation

    Code size (.text section, compiled with -Os)

    -

    Sizes are for FR_math.c compiled with -Os -ffreestanding. -Core = compiled with -DFR_CORE_ONLY (math only, no print, no waves). +

    Sizes are for FR_math.c compiled with -Os. +Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt). +Core = -DFR_CORE_ONLY (+ degree trig, BAM tan, log10, hypot). +Full = all features (+ print, waves, ADSR). With -ffunction-sections and linker --gc-sections, only the functions your application references are linked, so real flash usage will be smaller.

    - + - - - - - - - - - - + + + + + + + + + + + + + + +
    TargetCoreFull
    TargetLeanCoreFull
    RP2040 (Cortex-M0+)2.6 KB4.2 KB
    STM32 (Cortex-M4)2.6 KB4.2 KB
    RISC-V 32 (rv32imac)3.0 KB4.7 KB
    ESP32 (Xtensa)3.5 KB5.2 KB
    68k3.5 KB5.3 KB
    x86-64 (GCC)3.5 KB5.7 KB
    x86-324.5 KB6.8 KB
    MSP430 (16-bit)5.9 KB8.9 KB
    68HC1110.8 KB16.0 KB
    AVR (ATmega328P)7.0 KB10.6 KB
    Cortex-M4 (STM32)3.3 KB4.4 KB5.5 KB
    Cortex-M0 (RP2040)3.4 KB4.5 KB5.7 KB
    ARM Thumb3.4 KB4.7 KB5.9 KB
    RISC-V rv644.0 KB5.5 KB6.8 KB
    RISC-V rv324.1 KB5.5 KB6.8 KB
    Xtensa LX106 (ESP8266)4.2 KB5.8 KB7.3 KB
    ARM324.3 KB5.8 KB7.7 KB
    68k4.4 KB6.2 KB7.8 KB
    x86-64 (GCC)4.6 KB6.1 KB8.0 KB
    AArch64 (ARM64)4.8 KB6.6 KB8.7 KB
    x86-325.3 KB7.2 KB9.2 KB
    PowerPC5.8 KB8.0 KB10.4 KB
    MSP430 (16-bit)7.8 KB10.7 KB12.8 KB
    AVR (ATmega328P)9.2 KB12.8 KB15.4 KB
    68HC1113.3 KB18.4 KB22.6 KB

    Lean build options

    -

    Three compile-time #define guards let you strip optional subsystems +

    Compile-time #define guards let you strip optional subsystems for ROM-constrained targets. Define them before including FR_math.h (or pass -D on the compiler command line):

    - + +
    DefineWhat it removesTypical savings
    FR_CORE_ONLYEverything below (print + waves)~1.9 KB
    FR_LEANDegree trig, BAM tan, angle converters, FR_log10, FR_hypot, waves + ADSR~3.7 KB
    FR_CORE_ONLYPrint + waves (shorthand for both below)~1.9 KB
    FR_NO_PRINTFR_printNumF, FR_printNumD, FR_printNumH, FR_numstr~1.3 KB
    FR_NO_WAVESfr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC~0.6 KB
    +

    FR_LEAN keeps only radian trig (sin, cos, tan), inverse trig, sqrt, +log2, ln, exp, pow2, and arithmetic — comparable to libfixmath’s API at +4.7 KB text. FR_LEAN implies FR_NO_WAVES.

    +

    FR_CORE_ONLY is a convenience shorthand that defines both FR_NO_PRINT and FR_NO_WAVES in one step.

    diff --git a/pages/guide/examples.html b/pages/guide/examples.html index 71882f9..be8d55c 100644 --- a/pages/guide/examples.html +++ b/pages/guide/examples.html @@ -18,7 +18,7 @@

    Examples

    Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.0 library with:

    +example compiles cleanly against the v2.0.8 library with:

    cc -Isrc example.c src/FR_math.c -o example
     ./example
    diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html index ee8f5df..d5d300d 100644 --- a/pages/guide/getting-started.html +++ b/pages/guide/getting-started.html @@ -31,8 +31,8 @@

    Install

    • Copy src/FR_math.c, src/FR_math.h, src/FR_defs.h (and optionally - src/FR_math_2D.cpp, src/FR_math_2D.h, - and src/FR_trig_table.h) into the target project, or
    • + src/FR_math_2D.cpp, src/FR_math_2D.h) + into the target project, or
    • Add FR_Math as a git submodule and point the build system at src/.
    diff --git a/pages/index.html b/pages/index.html index 0040cd4..d8635c6 100644 --- a/pages/index.html +++ b/pages/index.html @@ -49,32 +49,32 @@

    Measured accuracy

    See the TDD report for sweeps at radixes 8, 12, 16, and 24.

    - - - - - - - - - - - - - - - - - - - - - - - - -
    FunctionMax err (%)*Avg err (%)Note
    sin/cos (BAM)0.45780.0076fr_sin_bam/fr_cos_bam direct; 129-entry table
    sin/cos (deg)0.45780.0076FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
    sin/cos (rad)0.61040.0085fr_sin/fr_cos via fr_rad_to_bam ±2π r16
    tan (BAM)0.58230.0008fr_tan_bam 65536-pt full; ±maxint at poles
    tan (deg)0.53110.0008FR_Tan ±360° s15.16 full; sat at poles
    tan (rad)13.40690.0029fr_tan ±2π r16 full; sat at poles
    asin / acos0.87430.030165536-pt; sqrt approx near boundary
    atan20.51000.023765536x5 radii; asin/acos+hypot_fast8
    atan0.33900.015420001-pt full sweep [-10,10]; via FR_atan2
    sqrt0.02390.0000Round-to-nearest
    log20.02860.002965-entry mantissa table
    pow20.00190.000365-entry fraction table
    ln, log100.00040.0000Via FR_MULK28 from log2
    exp0.00030.0000FR_MULK28 + FR_pow2
    exp_fast0.00090.0001Shift-only scaling
    pow100.00070.0000FR_MULK28 + FR_pow2
    pow10_fast0.00280.0002Shift-only scaling
    hypot (exact)0.00000.000064-bit intermediate
    hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
    -

    *Relative error; reference clamped to 1% of full-scale output.

    + + + + + + + + + + + + + + + + + + + + + + + + +
    FunctionMax err (%)*Avg err (%)Note
    sin/cos (BAM)0.15260.0030fr_sin_bam/fr_cos_bam direct; 129-entry table
    sin/cos (deg)0.15260.0029FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
    sin/cos (rad)0.18280.0033fr_sin/fr_cos via fr_rad_to_bam ±2π r16
    tan (BAM)0.58230.0008fr_tan_bam 65536-pt full; ±maxint at poles
    tan (deg)0.53110.0008fr_tan_deg ±360° s15.16 full; sat at poles
    tan (rad)0.03860.0001fr_tan ±2π r16; r24 pole bypass
    asin / acos0.77710.028065536-pt; sqrt approx near boundary
    atan20.25640.023765536x5 radii; asin/acos+hypot_fast8
    atan0.24250.015520001-pt full sweep [-10,10]; via FR_atan2
    sqrt0.00000.0000Round-to-nearest
    log20.01160.001665-entry mantissa table
    pow20.00180.000465-entry fraction table
    ln, log100.00040.0000Via FR_MULK28 from log2
    exp0.00030.0000FR_MULK28 + FR_pow2
    exp_fast0.00090.0001Shift-only scaling
    pow100.00050.0000FR_MULK28 + FR_pow2
    pow10_fast0.00220.0002Shift-only scaling
    hypot (exact)0.00000.000064-bit intermediate
    hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
    +

    *Relative error; reference clamped to 1% of full-scale output.

    What’s in the box

    @@ -102,21 +102,24 @@

    What’s in the box

    Lean build options

    -

    Two compile-time #define guards let you strip optional subsystems +

    Compile-time #define guards let you strip optional subsystems for ROM-constrained targets. Define them before including FR_math.h (or pass -D on the compiler command line):

    +
    DefineWhat it removesTypical savings
    FR_LEANDegree trig, BAM tan, angle converters, FR_log10, FR_hypot, waves + ADSR~3.7 KB
    FR_NO_PRINTFR_printNumF, FR_printNumD, FR_printNumH, FR_numstr~1.3 KB
    FR_NO_WAVESfr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC~0.6 KB
    -

    With both guards enabled the core math library (trig, inverse trig, log/exp, -sqrt, hypot) compiles to ~3.5 KB on x86-64 / clang -Os. On Thumb-2 this -would be roughly 2.6 KB.

    +

    FR_LEAN keeps only radian trig (sin, cos, tan), inverse trig, +sqrt, log2, ln, exp, pow2, and arithmetic — comparable to libfixmath’s +API but at 4.7 KB text vs libfixmath’s 4.9 KB + 112 KB BSS. +With FR_LEAN + FR_NO_PRINT the library compiles to +~4.7 KB on x86-64 / clang -Os.

    /* Example: headless sensor node — math only, no print, no audio */
     #define FR_NO_PRINT
    @@ -237,7 +240,7 @@ 

    Comparison

    Multiply-free optionNoNoYes (e.g. FR_EXP_FAST, FR_hypot_fast8) Wave generatorsNoNo6 shapes + ADSR DependenciesNoneARM onlyNone -Code size (Cortex-M0, -Os)2.4 KB~40 KB+4.2 KB +Code size (Cortex-M0, -Os)2.4 KB~40 KB+3.4 KB lean / 5.7 KB full @@ -246,7 +249,7 @@

    Comparison

    FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. See -docker/build_sizes.sh +scripts/crossbuild-docker.sh for the build script.

    History

    diff --git a/scripts/crossbuild-docker.sh b/scripts/crossbuild-docker.sh deleted file mode 100755 index 7f10d6d..0000000 --- a/scripts/crossbuild-docker.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# crossbuild-docker.sh -- cross-compile FR_math inside Docker container -# Runs inside the xelp-crossbuild Docker image. -# Reports object file and .text section sizes for each target. - -set -e - -SRC=/fr_math/src/FR_math.c -INCLUDE="-I/fr_math/src" -OBJ=/tmp/FR_math.o - -SEP="============================================================" - -# Accumulate summary rows: "label|text_size" -SUMMARY="" - -print_sizes() { - local label="$1" - echo "" - echo "$SEP" - echo "$label" - echo "$SEP" - if [ ! -f "$OBJ" ]; then - echo " (build failed)" - SUMMARY="${SUMMARY}${label}|FAIL\n" - return - fi - OBJ_SIZE=$(stat -c%s "$OBJ" 2>/dev/null || wc -c < "$OBJ") - TEXT_SIZE=$(size "$OBJ" 2>/dev/null | awk 'FNR==2{print $1}') - printf " obj file size: %6s bytes\n" "$OBJ_SIZE" - printf " .text section: %6s bytes\n" "$TEXT_SIZE" - SUMMARY="${SUMMARY}${label}|${TEXT_SIZE}\n" - rm -f "$OBJ" -} - -echo "" -echo "FR_Math cross-compilation size report" -echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" -echo "" - -# --- x86 --- -gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC x86-64" - -clang -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "Clang x86-64" - -gcc -c $SRC $INCLUDE -Os -m32 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC x86-32" - -tcc -c $SRC $INCLUDE -o $OBJ 2>&1 && true -print_sizes "TCC x86" - -# --- ARM --- -aarch64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AArch64 (ARM64)" - -arm-none-eabi-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC ARM32" - -arm-none-eabi-gcc -c $SRC $INCLUDE -Os -mthumb -Wall -o $OBJ 2>&1 && true -print_sizes "GCC ARM32 Thumb" - -# --- MSP430 --- -# Bare-metal: no stdint.h in sysroot — use fallback typedefs -NOSTD="-DFR_NO_STDINT" - -msp430-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC MSP430" - -# --- AVR --- -avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=avr5 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AVR5 (ATmega328P)" - -avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=attiny85 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AVR ATtiny85" - -# --- 68HC11 --- -m68hc11-gcc -c $SRC $INCLUDE $NOSTD -Os -o $OBJ 2>&1 && true -print_sizes "GCC 68HC11" - -# --- 68k (Motorola 68000) --- -m68k-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC m68k" - -# --- PowerPC --- -powerpc-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC PowerPC" - -# --- RISC-V --- -riscv64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC RISC-V (rv64)" - -riscv64-unknown-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -march=rv32imac -mabi=ilp32 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC RISC-V (rv32)" - -# --- Xtensa (ESP8266/ESP32 family) --- -xtensa-lx106-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC Xtensa LX106 (ESP8266)" - -# --- Function size table (native GCC) --- -echo "" -echo "$SEP" -echo "Function size table (GCC x86-64)" -echo "$SEP" -gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 -nm $OBJ -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true -rm -f $OBJ - -# --- Summary table --- -echo "" -echo "$SEP" -echo "Summary: FR_math.c code size (bytes), compiled with -Os" -echo "$SEP" -printf " %-28s %s\n" "Target" ".text (bytes)" -printf " %-28s %s\n" "----------------------------" "-------------" -echo -e "$SUMMARY" | while IFS='|' read -r label size; do - [ -z "$label" ] && continue - printf " %-28s %s\n" "$label" "$size" -done - -echo "" -echo "Done." diff --git a/scripts/crossbuild_sizes.sh b/scripts/crossbuild_sizes.sh new file mode 100755 index 0000000..b32a489 --- /dev/null +++ b/scripts/crossbuild_sizes.sh @@ -0,0 +1,290 @@ +#!/usr/bin/env bash +# +# crossbuild_sizes.sh — cross-compile FR_math inside Docker, generate size +# tables, and optionally patch doc files. +# +# Usage: +# scripts/crossbuild_sizes.sh # build, print table, write CSV + MD +# scripts/crossbuild_sizes.sh --update # also patch doc files +# +# Requires: docker, xelp-crossbuild:latest image +# +# Output files: +# build/sizes.csv — raw CSV (target,lean,core,full) +# build/sizes.md — markdown table +# +# With --update, patches these files between sentinels: +# README.md — markdown table +# docs/building.md — markdown table +# pages/guide/building.html — HTML + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +cd "${PROJECT_ROOT}" + +MODE="print" +for arg in "$@"; do + case "$arg" in + --update) MODE="update" ;; + -h|--help) + echo "Usage: scripts/crossbuild_sizes.sh [--update]" + echo " (no args) Build in Docker, print size table, write CSV + MD" + echo " --update Also patch README.md, docs/building.md, pages/guide/building.html" + exit 0 + ;; + *) echo "Unknown option: $arg" >&2; exit 1 ;; + esac +done + +# ----------------------------------------------------------------------- +# 1. Preflight checks +# ----------------------------------------------------------------------- + +if ! command -v docker >/dev/null 2>&1; then + echo "ERROR: docker not found. Install Docker first." >&2 + exit 1 +fi + +if ! docker image inspect xelp-crossbuild:latest >/dev/null 2>&1; then + echo "ERROR: Docker image 'xelp-crossbuild:latest' not found." >&2 + echo "Build it with: docker build -t xelp-crossbuild:latest scripts/" >&2 + exit 1 +fi + +mkdir -p build + +# ----------------------------------------------------------------------- +# 2. Run cross-compilation inside Docker +# ----------------------------------------------------------------------- + +echo "Running cross-compilation in Docker..." + +docker run --rm -v "${PROJECT_ROOT}:/fr_math" xelp-crossbuild:latest \ + /bin/bash -c ' +set -e + +SRC=/fr_math/src/FR_math.c +INCLUDE="-I/fr_math/src" +OBJ=/tmp/FR_math.o +CSV=/fr_math/build/sizes.csv + +LEAN_DEFS="-DFR_LEAN -DFR_NO_PRINT" +CORE_DEFS="-DFR_CORE_ONLY" +FULL_DEFS="" + +build_text_size() { + local compiler="$1" + local flags="$2" + local defs="$3" + rm -f "$OBJ" + if $compiler -c $SRC $INCLUDE $flags $defs -Os -Wall -o $OBJ 2>/dev/null; then + size "$OBJ" 2>/dev/null | awk "FNR==2{print \$1}" + else + echo "FAIL" + fi + rm -f "$OBJ" +} + +build_target() { + local label="$1" + local compiler="$2" + local flags="$3" + + local lean_sz=$(build_text_size "$compiler" "$flags" "$LEAN_DEFS") + local core_sz=$(build_text_size "$compiler" "$flags" "$CORE_DEFS") + local full_sz=$(build_text_size "$compiler" "$flags" "$FULL_DEFS") + echo "${label},${lean_sz},${core_sz},${full_sz}" >> "$CSV" +} + +# Write CSV header +echo "target,lean,core,full" > "$CSV" + +# --- x86 --- +build_target "GCC x86-64" "gcc" "" +build_target "Clang x86-64" "clang" "" +build_target "GCC x86-32" "gcc" "-m32" +build_target "TCC x86" "tcc" "" + +# --- ARM --- +build_target "GCC AArch64 (ARM64)" "aarch64-linux-gnu-gcc" "" +build_target "GCC ARM32" "arm-none-eabi-gcc" "" +build_target "GCC ARM Thumb" "arm-none-eabi-gcc" "-mthumb" +build_target "Cortex-M0 (RP2040)" "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb" +build_target "Cortex-M4 (STM32)" "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb" + +# --- Bare-metal: no stdint.h in sysroot --- +NOSTD="-DFR_NO_STDINT" + +# --- MSP430 --- +build_target "GCC MSP430" "msp430-gcc" "$NOSTD" + +# --- AVR --- +build_target "AVR ATmega328P" "avr-gcc" "$NOSTD -mmcu=avr5" +build_target "AVR ATtiny85" "avr-gcc" "$NOSTD -mmcu=attiny85" + +# --- 68HC11 --- +build_target "GCC 68HC11" "m68hc11-gcc" "$NOSTD" + +# --- 68k --- +build_target "GCC m68k" "m68k-linux-gnu-gcc" "" + +# --- PowerPC --- +build_target "GCC PowerPC" "powerpc-linux-gnu-gcc" "" + +# --- RISC-V --- +build_target "RISC-V rv64" "riscv64-linux-gnu-gcc" "" +build_target "RISC-V rv32" "riscv64-unknown-elf-gcc" "$NOSTD -march=rv32imac -mabi=ilp32" + +# --- Xtensa --- +build_target "Xtensa LX106 (ESP8266)" "xtensa-lx106-elf-gcc" "$NOSTD" + +echo "Docker build complete — $(grep -c , "$CSV") rows written to build/sizes.csv" +' + +# ----------------------------------------------------------------------- +# 3. Generate tables on host +# ----------------------------------------------------------------------- + +CSV="build/sizes.csv" + +if [ ! -f "${CSV}" ]; then + echo "ERROR: ${CSV} not found after Docker run." >&2 + exit 1 +fi + +# Sort by full size ascending (skip header) +SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k4,4n) + +if [ -z "${SORTED}" ]; then + echo "ERROR: No data rows in ${CSV}" >&2 + exit 1 +fi + +# Format bytes as X.X KB using integer math (no bc dependency) +fmt_kb() { + local val="$1" + if [[ "${val}" =~ ^[0-9]+$ ]]; then + local whole=$((val / 1024)) + local frac=$(( (val % 1024) * 10 / 1024 )) + echo "${whole}.${frac} KB" + else + echo "${val}" + fi +} + +# --- Console summary --- +echo "" +echo "============================================================" +echo "FR_math.c code size (.text bytes), compiled with -Os" +echo "============================================================" +echo "" +printf " %-28s %8s %8s %8s\n" "Target" "Lean" "Core" "Full" +printf " %-28s %8s %8s %8s\n" "----------------------------" "--------" "--------" "--------" +while IFS=',' read -r target lean core full; do + printf " %-28s %8s %8s %8s\n" "$target" "$lean" "$core" "$full" +done <<< "${SORTED}" +echo "" +echo "Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt)" +echo "Core = -DFR_CORE_ONLY (+ degree trig, BAM tan, log10, hypot)" +echo "Full = all features (+ print, waves, ADSR)" +echo "" + +# --- build/sizes.md --- +{ + echo "# FR_math.c Code Sizes (.text bytes, -Os)" + echo "" + echo "| Target | Lean | Core | Full |" + echo "|--------|-----:|-----:|-----:|" + while IFS=',' read -r target lean core full; do + printf "| %s | %s | %s | %s |\n" "$target" "$(fmt_kb "$lean")" "$(fmt_kb "$core")" "$(fmt_kb "$full")" + done <<< "${SORTED}" + echo "" + echo "Lean = \`-DFR_LEAN -DFR_NO_PRINT\` | Core = \`-DFR_CORE_ONLY\` | Full = all features" +} > build/sizes.md + +echo "Wrote build/sizes.csv and build/sizes.md" + +if [ "${MODE}" != "update" ]; then + exit 0 +fi + +# ----------------------------------------------------------------------- +# 4. Patch doc files +# ----------------------------------------------------------------------- + +# Build markdown replacement block +MD_ROWS="" +while IFS=',' read -r target lean core full; do + row="| ${target} | $(fmt_kb "${lean}") | $(fmt_kb "${core}") | $(fmt_kb "${full}") |" + if [ -n "${MD_ROWS}" ]; then + MD_ROWS+=$'\n' + fi + MD_ROWS+="${row}" +done <<< "${SORTED}" + +MD_TABLE=""$'\n' +MD_TABLE+="| Target | Lean | Core | Full |"$'\n' +MD_TABLE+="|--------|-----:|-----:|-----:|"$'\n' +MD_TABLE+="${MD_ROWS}"$'\n' +MD_TABLE+="" + +# Patch a markdown file between sentinels +patch_markdown() { + local file="$1" + if [ ! -f "$file" ]; then + echo " skip: $file not found" >&2 + return + fi + + perl -0777 -i -pe " + s{.*?} + {${MD_TABLE}}s + " "$file" + + echo " patched: $file" +} + +# Patch HTML file between sentinels +patch_html() { + local file="$1" + if [ ! -f "$file" ]; then + echo " skip: $file not found" >&2 + return + fi + + # Build HTML rows + local html_rows="" + while IFS=',' read -r target lean core full; do + local tr="" + if [ -n "$html_rows" ]; then + html_rows+=$'\n' + fi + html_rows+="${tr}" + done <<< "${SORTED}" + + local replacement + replacement=""$'\n' + replacement+="
    ${target}$(fmt_kb "${lean}")$(fmt_kb "${core}")$(fmt_kb "${full}")
    "$'\n' + replacement+=""$'\n' + replacement+=""$'\n' + replacement+="${html_rows}"$'\n' + replacement+=""$'\n' + replacement+="
    TargetLeanCoreFull
    "$'\n' + replacement+="" + + perl -0777 -i -pe " + s{.*?} + {${replacement}}s + " "$file" + + echo " patched: $file" +} + +echo "" +echo "Patching doc files..." +patch_markdown "README.md" +patch_markdown "docs/building.md" +patch_html "pages/guide/building.html" +echo "Done." diff --git a/scripts/size_report.sh b/scripts/size_report.sh deleted file mode 100755 index 69c875f..0000000 --- a/scripts/size_report.sh +++ /dev/null @@ -1,142 +0,0 @@ -#!/bin/bash -# Enhanced size report for FR_Math library -# Builds for multiple architectures and displays a formatted table - -set -e - -# Colors for output -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Source and build directories -SRC_DIR="src" -BUILD_DIR="build" -TEMP_DIR="build/size_report" - -# Create temp directory for builds -mkdir -p "$TEMP_DIR" - -# Function to build and get size for an architecture -build_and_size() { - local arch=$1 - local compiler=$2 - local flags=$3 - local output_file="$TEMP_DIR/FR_math_${arch}.o" - - if command -v $compiler >/dev/null 2>&1; then - # Try to compile - if $compiler $flags -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$output_file" 2>/dev/null; then - # Get size in bytes - local size=$(wc -c < "$output_file" 2>/dev/null || echo "0") - echo "$size" - else - echo "fail" - fi - else - echo "n/a" - fi -} - -# Function to format number with commas -format_number() { - printf "%'d" $1 2>/dev/null || echo $1 -} - -echo "" -echo "=========================================" -echo " FR_Math Multi-Architecture Size Report" -echo "=========================================" -echo "" -echo "Building for all available architectures..." -echo "" - -# Build for each architecture -x86_32_size=$(build_and_size "x86-32" "gcc" "-m32") -x86_64_size=$(build_and_size "x86-64" "gcc" "-m64") -arm32_size=$(build_and_size "arm32" "arm-linux-gnueabihf-gcc" "") -arm64_size=$(build_and_size "arm64" "aarch64-linux-gnu-gcc" "") -# Bare-metal Cortex-M (Thumb) targets — toolchain is arm-none-eabi-gcc. -# Cortex-M0 = Thumb-1 (very dense, no DSP), Cortex-M4 = Thumb-2 (DSP, MAC). -cm0_size=$(build_and_size "cortex-m0" "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb --specs=nosys.specs") -cm4_size=$(build_and_size "cortex-m4" "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb --specs=nosys.specs") -m68k_size=$(build_and_size "m68k" "m68k-elf-gcc" "") -# RISC-V: try the bare-metal newlib toolchain first, fall back to elf names. -riscv32_size=$(build_and_size "riscv32" "riscv64-unknown-elf-gcc" "-march=rv32imc -mabi=ilp32") -if [ "$riscv32_size" = "n/a" ]; then - riscv32_size=$(build_and_size "riscv32" "riscv32-unknown-elf-gcc" "") -fi -riscv64_size=$(build_and_size "riscv64" "riscv64-unknown-elf-gcc" "-march=rv64imac -mabi=lp64") - -# Native build -native_arch=$(uname -m) -native_size=$(build_and_size "native" "gcc" "") - -# Print formatted table -printf "┌──────────────┬──────────────┬──────────┐\n" -printf "│ Architecture │ Compiler │ Size │\n" -printf "├──────────────┼──────────────┼──────────┤\n" - -# Function to print a row -print_row() { - local arch=$1 - local compiler=$2 - local size=$3 - - if [ "$size" = "n/a" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "not found" " -" - elif [ "$size" = "fail" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "error" " -" - elif [ "$size" = "0" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "$compiler" " -" - else - printf "│ %-12s │ %-12s │ %'8d │\n" "$arch" "$compiler" "$size" - fi -} - -# Print each architecture -print_row "x86-32" "gcc -m32" "$x86_32_size" -print_row "x86-64" "gcc -m64" "$x86_64_size" -print_row "ARM32" "arm-gcc" "$arm32_size" -print_row "ARM64" "aarch64-gcc" "$arm64_size" -print_row "Cortex-M0" "arm-eabi-gcc" "$cm0_size" -print_row "Cortex-M4" "arm-eabi-gcc" "$cm4_size" -print_row "68k" "m68k-gcc" "$m68k_size" -print_row "RISC-V 32" "riscv32-gcc" "$riscv32_size" -print_row "RISC-V 64" "riscv64-gcc" "$riscv64_size" -printf "├──────────────┼──────────────┼──────────┤\n" -print_row "Native($native_arch)" "gcc" "$native_size" -printf "└──────────────┴──────────────┴──────────┘\n" - -# Optimization comparison for native -if [ "$native_size" != "n/a" ] && [ "$native_size" != "fail" ]; then - echo "" - echo "Optimization Comparison (Native $native_arch):" - echo "────────────────────────────────────────" - - os_size=$(gcc -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_Os.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_Os.o" || echo "0") - o2_size=$(gcc -Isrc -Wall -O2 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O2.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O2.o" || echo "0") - o3_size=$(gcc -Isrc -Wall -O3 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O3.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O3.o" || echo "0") - o0_size=$(gcc -Isrc -Wall -O0 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O0.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O0.o" || echo "0") - - printf " -O0 (none): %'8d bytes\n" $o0_size - printf " -Os (size): %'8d bytes\n" $os_size - printf " -O2 (speed): %'8d bytes\n" $o2_size - printf " -O3 (max): %'8d bytes\n" $o3_size -fi - -echo "" -echo "Note: Install cross-compilers for more architectures:" -echo " Ubuntu/Debian:" -echo " sudo apt-get install gcc-multilib g++-multilib" -echo " sudo apt-get install gcc-arm-linux-gnueabihf" -echo " sudo apt-get install gcc-aarch64-linux-gnu" -echo " sudo apt-get install gcc-arm-none-eabi # Cortex-M (Thumb)" -echo " sudo apt-get install gcc-riscv64-unknown-elf" -echo " sudo apt-get install gcc-m68k-linux-gnu" -echo "" -echo " macOS (via brew):" -echo " brew install --cask gcc-arm-embedded # Cortex-M (Thumb)" -echo " brew install arm-gnu-toolchain" -echo " brew install riscv-gnu-toolchain" -echo "" \ No newline at end of file diff --git a/scripts/update_sizes.sh b/scripts/update_sizes.sh deleted file mode 100755 index b696edd..0000000 --- a/scripts/update_sizes.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env bash -# -# update_sizes.sh — read build/sizes.csv and patch the size table into -# README.md, docs/building.md, and pages/guide/building.html. -# -# Usage: -# scripts/update_sizes.sh # print table to stdout -# scripts/update_sizes.sh --update # also patch the three doc files -# -# The table is delimited by sentinel comments: -# -# ... -# -# -# Exit status: 0 on success, non-zero on missing CSV or extraction failure. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -cd "${PROJECT_ROOT}" - -CSV="build/sizes.csv" -MODE="print" - -for arg in "$@"; do - case "$arg" in - --update) MODE="update" ;; - -h|--help) - echo "Usage: scripts/update_sizes.sh [--update]" - echo " (no args) Read build/sizes.csv, print size table" - echo " --update Also patch README.md, docs/building.md, pages/guide/building.html" - exit 0 - ;; - *) echo "Unknown option: $arg" >&2; exit 1 ;; - esac -done - -if [ ! -f "${CSV}" ]; then - echo "ERROR: ${CSV} not found. Run docker/build_sizes.sh first." >&2 - exit 1 -fi - -# ----------------------------------------------------------------------- -# 1. Read CSV and sort by width then full_bytes ascending -# ----------------------------------------------------------------------- - -# Skip header, sort numerically by field 2 (width) then field 4 (full_bytes) -SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k4,4n) - -if [ -z "${SORTED}" ]; then - echo "ERROR: No data rows in ${CSV}" >&2 - exit 1 -fi - -# Build markdown data rows -MD_ROWS="" -while IFS=',' read -r target width core full; do - # Format bytes as X.X KB - fmt_kb() { - local val="$1" - if [[ "${val}" =~ ^[0-9]+$ ]]; then - awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }" - else - echo "${val}" - fi - } - row="| ${target} | $(fmt_kb "${core}") | $(fmt_kb "${full}") |" - if [ -n "${MD_ROWS}" ]; then - MD_ROWS+=$'\n' - fi - MD_ROWS+="${row}" -done <<< "${SORTED}" - -# Build full markdown table -MD_TABLE=""$'\n' -MD_TABLE+="| Target | Core | Full |"$'\n' -MD_TABLE+="|--------|-----:|-----:|"$'\n' -MD_TABLE+="${MD_ROWS}"$'\n' -MD_TABLE+="" - -echo "${MD_TABLE}" - -if [ "${MODE}" != "update" ]; then - exit 0 -fi - -# ----------------------------------------------------------------------- -# 2. Patch markdown files -# ----------------------------------------------------------------------- -patch_markdown() { - local file="$1" - if [ ! -f "$file" ]; then - echo " skip: $file not found" >&2 - return - fi - - perl -0777 -i -pe " - s{.*?} - {${MD_TABLE}}s - " "$file" - - echo " patched: $file" >&2 -} - -patch_markdown "README.md" -patch_markdown "docs/building.md" - -# ----------------------------------------------------------------------- -# 3. Patch HTML file (pages/guide/building.html) -# ----------------------------------------------------------------------- -patch_html() { - local file="$1" - if [ ! -f "$file" ]; then - echo " skip: $file not found" >&2 - return - fi - - # Convert sorted CSV rows to HTML rows - local html_rows="" - while IFS=',' read -r target width core full; do - fmt_kb() { - local val="$1" - if [[ "${val}" =~ ^[0-9]+$ ]]; then - awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }" - else - echo "${val}" - fi - } - local tr="${target}$(fmt_kb "${core}")$(fmt_kb "${full}")" - if [ -n "$html_rows" ]; then - html_rows+=$'\n' - fi - html_rows+="${tr}" - done <<< "${SORTED}" - - # Build the replacement block - local replacement - replacement=""$'\n' - replacement+=""$'\n' - replacement+=""$'\n' - replacement+=""$'\n' - replacement+="${html_rows}"$'\n' - replacement+=""$'\n' - replacement+="
    TargetCoreFull
    "$'\n' - replacement+="" - - perl -0777 -i -pe " - s{.*?} - {${replacement}}s - " "$file" - - echo " patched: $file" >&2 -} - -patch_html "pages/guide/building.html" - -echo "Size table updated in all doc files." >&2 diff --git a/src/FR_math.c b/src/FR_math.c index 45c75c2..95809f8 100644 --- a/src/FR_math.c +++ b/src/FR_math.c @@ -30,12 +30,68 @@ */ #include "FR_math.h" -#include "FR_trig_table.h" #ifndef FR_NO_STDINT #include #endif +/*======================================================= + * Trig lookup tables (inlined — no separate header needed) + * + * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format. + * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format. + * Generated by tools/coef-gen.py — do not hand-edit. + */ + +#define FR_TRIG_TABLE_BITS (7) +#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) + +#define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) +#define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) +#define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) +#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) +#define FR_TRIG_QUADRANT (1 << 14) + +static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { + 0, 402, 804, 1206, 1608, 2009, 2411, 2811, + 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, + 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, + 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, + 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, + 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, + 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, + 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, + 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, + 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, + 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, + 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, + 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, + 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, + 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, + 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, + 32768 +}; + +#define FR_TAN_TABLE_BITS (6) +#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) +#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) +#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) +#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) +#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) +#define FR_TAN_OCTANT (1 << 13) + +static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { + 0, 402, 804, 1207, 1610, 2013, 2417, 2822, + 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, + 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, + 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, + 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, + 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, + 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, + 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, + 32768 +}; + /*======================================================= * Full-precision radian/degree → BAM conversion helpers * @@ -57,6 +113,7 @@ static s32 rad_to_bam_full(s32 r) return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); } +#ifndef FR_LEAN /* deg_to_bam_full(d) — same idea for degrees. * Input d must already be normalized to radix 16 and reduced to [-90, 90). * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). @@ -65,6 +122,7 @@ static s32 deg_to_bam_full(s32 d) { return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); } +#endif /* Normalize a fixed-radix value to radix 16. */ static s32 normalize_to_r16(s32 val, u16 radix) @@ -74,8 +132,7 @@ static s32 normalize_to_r16(s32 val, u16 radix) : val; } -/* Reduce non-negative radian (at r16) to [0, 2*pi). - * Helper used by range_reduce_rad and the near-pi small-angle paths. */ +/* Reduce non-negative radian (at r16) to [0, 2*pi). */ static s32 reduce_to_2pi(s32 r) { const s32 two_pi = FR_TWO_PI(16); /* 411775 */ @@ -86,32 +143,45 @@ static s32 reduce_to_2pi(s32 r) return r; } -/* Range-reduce radian value (at r16, non-negative) to [-pi, pi]. - * Caller guarantees r >= 0 (sign is handled externally). */ -static s32 range_reduce_rad(s32 r) + +/* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM. + * Uses quadrant decomposition to keep rad_to_bam_full in its safe + * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */ +static u16 rad_r16_to_bam(s32 r) { - r = reduce_to_2pi(r); - if (r > FR_PI(16)) - r -= FR_TWO_PI(16); - return r; + const s32 half_pi = FR_HALF_PI(16); /* 102944 */ + const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */ + const s32 pi = FR_PI(16); /* 205887 */ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + + u16 offset = 0; + if (r >= half_pi && r < three_half_pi) { + r -= pi; + offset = 0x8000u; + } else if (r >= three_half_pi) { + r -= two_pi; + /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */ + } + return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16)); } +/* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */ + /* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. - * Normalizes to r16, reduces via positive-only path, applies shift-only multiply. - * Handles inputs beyond ±2*pi with modulus (slow path). */ + * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */ u16 fr_rad_to_bam(s32 rad, u16 radix) { s32 r = normalize_to_r16(rad, radix); - /* BAM wraps naturally in u16, but range_reduce expects non-negative. - * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */ - s32 sign = 1; - if (r < 0) { r = -r; sign = -1; } - r = range_reduce_rad(r); - s32 bam_full = rad_to_bam_full(r); - if (sign < 0) bam_full = -bam_full; - return (u16)((bam_full + (1 << 15)) >> 16); + /* Normalize to [0, 2π) */ + if (r < 0) { + r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16); + if (r < 0) r += FR_TWO_PI(16); + } + r = reduce_to_2pi(r); + return rad_r16_to_bam(r); } +#ifndef FR_LEAN /* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ u16 fr_deg_to_bam(s32 deg, u16 radix) @@ -133,6 +203,7 @@ u16 fr_deg_to_bam(s32 deg, u16 radix) return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); } +#endif /*======================================================= * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan @@ -241,6 +312,7 @@ s32 fr_sin(s32 rad, u16 radix) return (sign < 0) ? -v : v; } +#ifndef FR_LEAN /*======================================================= * BAM-native tangent: fr_tan_bam * @@ -327,84 +399,142 @@ s32 fr_tan_bam(u16 bam) return (sign < 0) ? -raw : raw; } +#endif /* FR_LEAN */ -/* fr_tan — radian-input tangent with full sub-BAM precision. - * - * Goes directly to the 65-entry octant tangent table with 16-bit - * interpolation precision. Sign from quadrant, magnitude from table. - * No s64 intermediates. One 32-bit division in the second-octant path. +/* fr_tan — radian-input tangent. * - * Architecture: - * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-) - * 2. Magnitude: from octant table lookup + reciprocal identity - * - First octant [0,45°): direct table lerp - * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal - * 3. Return sign * magnitude */ - -/* Internal: given a full s32 BAM, compute |tan| directly from the table. - * Returns the unsigned magnitude (always >= 0). */ -static s32 tan_mag_from_bam_full(s32 bam_full) + * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM, + * then do direct octant table lookup + interpolation inline. + * Small-angle bypass at zero crossings: tan(x) ≈ x. + * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid + * BAM quantization error amplified by the reciprocal. */ +s32 fr_tan(s32 rad, u16 radix) { - u16 bam0 = (u16)(bam_full >> 16); - u32 frac_sub = (u32)bam_full & 0xFFFFu; + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); - u32 q = ((u32)bam0 >> 14) & 0x3u; - u32 inq = (u32)bam0 & 0x3FFFu; + /* tan(-x) = -tan(x): extract sign, work with |r| */ + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); - /* Exact zeros: tan(0°) = tan(180°) = 0 */ - if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2)) - return 0; + /* Small-angle bypass at zero crossings: tan(δ) ≈ δ */ + if (r < 256) + return (sign < 0) ? -r : r; + { + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) + return (sign < 0) ? -delta : delta; + } + { + s32 delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) + return (sign < 0) ? delta : -delta; + } - /* Exact poles: tan(90°) = tan(270°) → saturate */ - if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3)) - return FR_TRIG_MAXVAL; + /* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2, + * use cot(δ) ≈ 1/δ from the radian distance directly. + * Compute δ at r24 using precise pole constants (8× less rounding + * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants). + * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */ + { + const s32 pole_thresh = 2048; /* r16 units (~1.79°) */ + /* Precise pole positions at r24: + * π/2 × 2^24 = 26353589.76 → 26353590 + * 3π/2 × 2^24 = 79060769.28 → 79060769 */ + const s32 half_pi_r24 = 26353590; + const s32 three_half_pi_r24 = 79060769; + + s32 d1 = r - FR_HALF_PI(16); /* coarse check at r16 */ + s32 d2 = r - FR_THREE_HALF_PI(16); + s32 pole_delta_r24 = 0; + + if (d1 >= -pole_thresh && d1 <= pole_thresh) { + s32 r24 = r << 8; + s32 dd = r24 - half_pi_r24; + pole_delta_r24 = (dd < 0) ? -dd : dd; + } else if (d2 >= -pole_thresh && d2 <= pole_thresh) { + s32 r24 = r << 8; + s32 dd = r24 - three_half_pi_r24; + pole_delta_r24 = (dd < 0) ? -dd : dd; + } - /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range. - * After this, full_pos represents distance from the nearest zero. */ - u32 full_pos; - if (q == 1 || q == 3) - full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub; - else - full_pos = ((u32)inq << 16) + frac_sub; + if (pole_delta_r24 > 0) { + /* Determine sign from radian quadrant */ + s32 pole_sign; + if (r < FR_HALF_PI(16)) + pole_sign = 1; /* before π/2: → +∞ */ + else if (r < FR_PI(16)) + pole_sign = -1; /* past π/2: → -∞ */ + else if (r <= FR_THREE_HALF_PI(16)) + pole_sign = 1; /* before 3π/2: → +∞ */ + else + pole_sign = -1; /* past 3π/2: → -∞ */ + + s32 raw; + if (pole_delta_r24 < 512) { + raw = FR_TRIG_MAXVAL; /* δ < 2 at r16 → saturate */ + } else { + /* cot(δ) ≈ 1/δ. In s15.16: (2^40) / δ_r24 */ + raw = (s32)((1ULL << 40) / (u32)pole_delta_r24); + if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL; + } + s32 v = (pole_sign < 0) ? -raw : raw; + return (sign < 0) ? -v : v; + } + } + + /* Convert radian to u16 BAM */ + u16 bam = rad_r16_to_bam(r); + + /* Decompose BAM into quadrant + in-quadrant */ + u32 q = ((u32)bam >> 14) & 0x3; + u32 inq = (u32)bam & 0x3FFFu; + s32 tsign = 1; /* tan sign from quadrant */ + + /* Exact zeros: bam lands on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Q1/Q3: reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + tsign = -1; + } - /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */ + /* Octant table lookup + interpolation (same logic as fr_tan_bam) */ + u32 idx, frac; s32 raw; - if (full_pos < ((u32)FR_TAN_OCTANT << 16)) { - /* First octant [0, 45°): direct table lookup. - * 64 table intervals, each 2^23 sub-BAM units wide. */ - u32 idx = full_pos >> 23; - u32 frac16 = (full_pos >> 7) & 0xFFFFu; + if (inq < FR_TAN_OCTANT) { + /* First octant [0°, 45°): direct lookup */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; s32 lo = (s32)gFR_TAN_TAB_O[idx]; s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { - /* Near zero: redo with 4 extra bits of precision */ - s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; - s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; - raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); - raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + s32 lo4 = lo << 4; + s32 hi4 = hi << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; } else { - raw <<= 1; /* u0.15 → s15.16 */ + raw <<= 1; } } else { - /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x). - * Complement = distance from pole, in first-octant range. */ - u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos; - - u32 idx = comp >> 23; - u32 frac16 = (comp >> 7) & 0xFFFFu; - + /* Second octant [45°, 90°): reciprocal identity */ + u32 comp = 0x4000u - inq; + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; s32 lo = (s32)gFR_TAN_TAB_O[idx]; s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; - raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw < 0x40) { - /* Near pole: redo with 4 extra bits then reciprocal */ - s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; - s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; - s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + s32 lo4 = lo << 4; + s32 hi4 = hi << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); if (raw_hp < 32) raw = FR_TRIG_MAXVAL; else @@ -413,35 +543,13 @@ static s32 tan_mag_from_bam_full(s32 bam_full) raw = (s32)(0x80000000u / (u32)raw); } } - return raw; -} - -s32 fr_tan(s32 rad, u16 radix) -{ - if (rad == 0) return 0; - /* tan(-x) = -tan(x): factor out sign, reduce positive */ - s32 r = normalize_to_r16(rad, radix); - s32 tan_sign = 1; - if (r < 0) { r = -r; tan_sign = -1; } - r = reduce_to_2pi(r); - /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */ - s32 delta = r - FR_PI(16); - if (delta >= -256 && delta <= 256) { - return (tan_sign < 0) ? -delta : delta; - } - /* Full pipeline */ - if (r > FR_PI(16)) - r -= FR_TWO_PI(16); - s32 bam_full = rad_to_bam_full(r); - /* Sign from quadrant of the BAM position */ - u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; - s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; - - s32 mag = tan_mag_from_bam_full(bam_full); - return (sign < 0) ? -mag : mag; + /* Combine quadrant sign and input sign */ + s32 v = (tsign < 0) ? -raw : raw; + return (sign < 0) ? -v : v; } +#ifndef FR_LEAN /*======================================================= * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then * call the BAM-native functions. Cardinal angles are exact. @@ -507,41 +615,61 @@ s32 FR_TanI(s32 deg) return fr_tan_bam(FR_DEG2BAM_I(deg)); } -/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */ -static s32 range_reduce_deg_bam_full(s32 deg, u16 radix) -{ - s32 d = normalize_to_r16(deg, radix); - if (d >= FR_D360_R16) { - s32 n = d / FR_D360_R16; - d -= n * FR_D360_R16; - } - if (d >= FR_D180_R16) d -= FR_D360_R16; - s32 offset = 0; - if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; } - else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; } - return offset + deg_to_bam_full(d); -} - s32 fr_tan_deg(s32 deg, u16 radix) { if (radix == 0) return FR_TanI(deg); - /* tan(-x) = -tan(x): factor out sign, reduce positive */ - s32 tan_sign = 1; - if (deg < 0) { deg = -deg; tan_sign = -1; } - /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */ + s32 deg_orig = deg; + /* Normalize to [0, 360°) at caller radix */ + s32 d360 = 360 << radix; + if (deg < 0) { + deg += ((-deg) / d360) * d360; + if (deg < 0) deg += d360; + } + if (deg >= d360) { + deg -= (deg / d360) * d360; + } + /* Exact cardinal angles */ s32 frac_mask = (1 << radix) - 1; if ((deg & frac_mask) == 0) { - s32 deg_int = deg >> radix; - s32 rem = deg_int % 180; - if (rem == 0) return 0; - if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + s32 ideg = deg >> radix; + if (ideg == 0 || ideg == 180) return 0; + if (ideg == 90 || ideg == 270) + return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + /* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */ + s32 d = normalize_to_r16(deg, radix); + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta; + /* Near 0° */ + if (d < DEG_THRESH) { + s32 up = d << 8; + return (FR_DEG2RAD(up) + (1 << 7)) >> 8; + } + /* Near 180° */ + delta = d - FR_D180_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 up = delta << 8; + return (FR_DEG2RAD(up) + (1 << 7)) >> 8; + } + /* Near 360° */ + delta = FR_D360_R16 - d; + if (delta >= 0 && delta < DEG_THRESH) { + s32 up = delta << 8; + return -((FR_DEG2RAD(up) + (1 << 7)) >> 8); + } } - s32 bam_full = range_reduce_deg_bam_full(deg, radix); - u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; - s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; - s32 mag = tan_mag_from_bam_full(bam_full); - return (sign < 0) ? -mag : mag; + /* Main path: convert to u16 BAM, table lookup */ + u16 bam = fr_deg_to_bam(deg, radix); + s32 v = fr_tan_bam(bam); + /* Near-pole BAM alias: determine sign from normalized angle position */ + if (bam == 0x4000u || bam == 0xC000u) { + s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16); + v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + return v; } +#endif /* FR_LEAN */ /*======================================================= * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) @@ -992,11 +1120,13 @@ s32 FR_ln(s32 input, u16 radix, u16 output_radix) return FR_MULK28(r, FR_krLOG2E_28); } +#ifndef FR_LEAN s32 FR_log10(s32 input, u16 radix, u16 output_radix) { s32 r = FR_log2(input, radix, output_radix); return FR_MULK28(r, FR_krLOG2_10_28); } +#endif #ifndef FR_NO_PRINT /*************************************** @@ -1350,6 +1480,7 @@ s32 FR_sqrt(s32 input, u16 radix) * * Side effects: none. Pure function. */ +#ifndef FR_LEAN s32 FR_hypot(s32 x, s32 y, u16 radix) { uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); @@ -1357,6 +1488,7 @@ s32 FR_hypot(s32 x, s32 y, u16 radix) (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ return (s32)fr_isqrt64(xx + yy); } +#endif /*======================================================= * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. diff --git a/src/FR_math.h b/src/FR_math.h index ca4b096..6991d4f 100644 --- a/src/FR_math.h +++ b/src/FR_math.h @@ -32,14 +32,18 @@ #ifndef __FR_Math_h__ #define __FR_Math_h__ -#define FR_MATH_VERSION "2.0.7" -#define FR_MATH_VERSION_HEX 0x020007 /* major << 16 | minor << 8 | patch */ +#define FR_MATH_VERSION "2.0.8" +#define FR_MATH_VERSION_HEX 0x020008 /* major << 16 | minor << 8 | patch */ #ifdef FR_CORE_ONLY #define FR_NO_PRINT #define FR_NO_WAVES #endif +#ifdef FR_LEAN +#define FR_NO_WAVES +#endif + #ifdef __cplusplus extern "C" { @@ -373,7 +377,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { #define FR_D360_R16 ((s32)360 << 16) u16 fr_rad_to_bam(s32 rad, u16 radix); +#ifndef FR_LEAN u16 fr_deg_to_bam(s32 deg, u16 radix); +#endif /* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */ #define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25)) @@ -424,20 +430,16 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four * cardinal angles where the result is exact. * - * FR_USE_EXTENDED_TRIG_PREC (default: ON) enables sub-BAM interpolation - * in fr_sin/fr_cos/fr_tan (the radian/degree-input functions). This adds - * one extra multiply per call but recovers ~16 bits of sub-BAM precision. - * To disable (faster, no multiply in the trig hot path): - * - * #define FR_USE_EXTENDED_TRIG_PREC 0 - * #include "FR_math.h" + * The radian and degree wrappers (fr_sin, fr_cos, fr_tan, etc.) range-reduce + * their input, convert to u16 BAM, and call the BAM-native functions. Small- + * angle bypasses at the zero crossings (sin≈0, cos≈0, tan≈0) use the linear + * approximation sin(δ)≈δ to avoid BAM quantization error where it matters most. */ -#ifndef FR_USE_EXTENDED_TRIG_PREC -#define FR_USE_EXTENDED_TRIG_PREC 1 -#endif s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); +#ifndef FR_LEAN s32 fr_tan_bam(u16 bam); +#endif s32 fr_cos(s32 rad, u16 radix); s32 fr_sin(s32 rad, u16 radix); s32 fr_tan(s32 rad, u16 radix); @@ -449,6 +451,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /* #define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ /* #define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ +#ifndef FR_LEAN /*=============================================== * Degree-input trig API * @@ -471,6 +474,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { #define FR_Sin fr_sin_deg #define FR_Cos fr_cos_deg #define FR_Tan fr_tan_deg +#endif /* FR_LEAN */ /* Inverse trig — output in radians at caller-specified radix (s32). * FR_atan2 returns radians at radix 16 (s15.16). @@ -487,7 +491,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { s32 FR_log2(s32 input, u16 radix, u16 output_radix); s32 FR_ln(s32 input, u16 radix, u16 output_radix); +#ifndef FR_LEAN s32 FR_log10(s32 input, u16 radix, u16 output_radix); +#endif /* Power */ s32 FR_pow2(s32 input, u16 radix); @@ -535,7 +541,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * can check `result == FR_DOMAIN_ERROR` to detect domain errors. */ s32 FR_sqrt(s32 input, u16 radix); +#ifndef FR_LEAN s32 FR_hypot(s32 x, s32 y, u16 radix); +#endif /* Fast approximate magnitude — shift-only, no multiply, no 64-bit. * Based on piecewise-linear approximation of sqrt(x*x + y*y). diff --git a/src/FR_math_2D.cpp b/src/FR_math_2D.cpp index c9025b3..b45ca75 100644 --- a/src/FR_math_2D.cpp +++ b/src/FR_math_2D.cpp @@ -5,7 +5,7 @@ * * @copy Copyright (C) <2001-2026> * @author M A Chatterjee - * @version 2.0.7 M. A. Chatterjee, cleaned up naming + * @version 2.0.8 M. A. Chatterjee, cleaned up naming * * This file contains integer math settable fixed point radix math routines for * use on systems in which floating point is not desired or unavailable. diff --git a/src/FR_math_2D.h b/src/FR_math_2D.h index 8f16330..3eaf7d3 100644 --- a/src/FR_math_2D.h +++ b/src/FR_math_2D.h @@ -3,7 +3,7 @@ * * @copy Copyright (C) <2001-2026> * @author M A Chatterjee - * @version 2.0.7 M. A. Chatterjee, cleaned up naming + * @version 2.0.8 M. A. Chatterjee, cleaned up naming * * This file contains integer math settable fixed point radix math routines for * use on systems in which floating point is not desired or unavailable. diff --git a/src/FR_tan32.c b/src/FR_tan32.c deleted file mode 100644 index 1f8fdec..0000000 --- a/src/FR_tan32.c +++ /dev/null @@ -1,282 +0,0 @@ -/** - * @file FR_tan32.c - division-free tangent and binary-search atan2 - * - * fr_tan_bam32: hybrid tangent — table lookup + sin/cos near pole. - * 0-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64]. - * 45-75°: variable-radix u16 mantissa + shift tables (no division). - * 75-90°: sin/cos ratio from cosine table (one s64 division). - * - * fr_tan_bam32_d64: full-range sin/cos ratio from cosine table. - * Kept for comparison. One s64 division per call. - * - * fr_atan2_32: binary search on the 129-entry u32 tan quadrant table - * (gFR_TAN_TAB_Q), then quadrant mapping. - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - */ - -#include "FR_math.h" -#include "FR_trig_table.h" -#include "FR_tan_table.h" - -#ifndef FR_NO_STDINT -#include -#endif - -/*======================================================= - * cos_lerp_full — interpolated cosine from the 129-entry quadrant table. - * - * Returns cos(inq) in high-precision fixed-point (7 extra frac bits). - * Used internally by fr_tan_bam32 for the 75°-90° sin/cos path and - * by fr_tan_bam32_d64 for the full-range sin/cos path. - */ -static s32 cos_lerp_full(u32 inq) -{ - u32 idx = inq >> FR_TRIG_FRAC_BITS; - u32 frac = inq & FR_TRIG_FRAC_MASK; - s32 lo = gFR_COS_TAB_Q[idx]; - s32 d = lo - gFR_COS_TAB_Q[idx + 1]; - return (lo << FR_TRIG_FRAC_BITS) - d * (s32)frac; -} - -/*======================================================= - * fr_tan_bam32 — hybrid tangent: table lookup + sin/cos near pole. - * - * Three zones: - * 0°-45°: direct u32 lerp from gFR_TAN_TAB_Q[0..64]. - * 7-bit index + 7-bit frac. All u32, no division. - * - * 45°-75°: variable-radix u16 mantissa + u8 shift tables - * (gFR_TAN_MANT_Q2 / gFR_TAN_SHIFT_Q2). - * All u32, no division. - * - * 75°-90°: sin/cos ratio via the 129-entry cosine table. - * One s64 division. Handles the pole accurately. - * - * Poles: ±FR_TRIG_MAXVAL (90° = +, 270° = -). - * Result: s32 at radix 16 (s15.16). - */ -#define FR_TAN_OCT_HALF (1 << 13) /* 8192 = 45 deg in BAM quadrant */ -#define FR_TAN_D64_THRESH ((u32)(75.0 / 90.0 * 16384 + 0.5)) /* 13653 */ - -s32 fr_tan_bam32(u16 bam) -{ - u32 q = ((u32)bam >> 14) & 0x3; - u32 inq = (u32)bam & 0x3FFFu; - s32 sign = (q & 1) ? -1 : 1; - - /* Poles: exactly 90° or 270° */ - if (inq == 0 && (q & 1)) - return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - - if (q & 1) - inq = 0x4000u - inq; - - u32 raw; - - if (inq <= FR_TAN_OCT_HALF) { - /* First octant (0°-45°): direct u32 table lookup */ - u32 idx = inq >> FR_TAN32_FRAC_BITS; - u32 frac = inq & FR_TAN32_FRAC_MASK; - u32 lo = gFR_TAN_TAB_Q[idx]; - u32 delta = gFR_TAN_TAB_Q[idx + 1] - lo; - raw = lo + ((delta * frac) >> FR_TAN32_FRAC_BITS); - } else if (inq < FR_TAN_D64_THRESH) { - /* Second octant 45°-75°: variable-radix u16+shift */ - u32 oct2 = inq - FR_TAN_OCT_HALF; - u32 idx = oct2 >> FR_TAN32_FRAC_BITS; - u32 frac = oct2 & FR_TAN32_FRAC_MASK; - - u32 m_lo = gFR_TAN_MANT_Q2[idx]; - u32 m_hi = gFR_TAN_MANT_Q2[idx + 1]; - u32 s_lo = gFR_TAN_SHIFT_Q2[idx]; - u32 s_hi = gFR_TAN_SHIFT_Q2[idx + 1]; - u32 s_max = (s_hi > s_lo) ? s_hi : s_lo; - - u32 a_lo = m_lo >> (s_max - s_lo); - u32 a_hi = m_hi >> (s_max - s_hi); - u32 delta = a_hi - a_lo; - - raw = (a_lo + ((delta * frac) >> FR_TAN32_FRAC_BITS)) << s_max; - } else { - /* 75°-90°: sin/cos ratio from cosine table (one s64 division) */ - s32 cos_val = cos_lerp_full(inq); - s32 sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq); - if (cos_val == 0) - raw = (u32)FR_TRIG_MAXVAL; - else - raw = (u32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val); - } - - return (sign < 0) ? -(s32)raw : (s32)raw; -} - -/*======================================================= - * fr_tan_bam32_d64 — tangent via sin/cos from the cosine table. - * - * Full-range sin/cos implementation kept for comparison. - * Computes sin(x)/cos(x) using the 129-entry cosine quadrant table. - * One s64 division per call. - */ -s32 fr_tan_bam32_d64(u16 bam) -{ - u32 q = ((u32)bam >> 14) & 0x3; - u32 inq = (u32)bam & 0x3FFFu; - s32 sign = 1; - s32 sin_val, cos_val; - s32 raw; - - if (inq == 0 && (q == 0 || q == 2)) - return 0; - if (inq == 0 && (q == 1 || q == 3)) - return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - - if (q == 1 || q == 3) { - inq = 0x4000u - inq; - sign = -1; - } - - cos_val = cos_lerp_full(inq); - sin_val = cos_lerp_full(FR_TAN32_QUADRANT - inq); - - if (cos_val == 0) - raw = FR_TRIG_MAXVAL; - else { - raw = (s32)((((s64)sin_val << 16) + ((s64)cos_val >> 1)) / (s64)cos_val); - } - - return (sign < 0) ? -raw : raw; -} - -/* fr_tan32: tan from radians at caller-specified radix. s15.16 result. */ -s32 fr_tan32(s32 rad, u16 radix) -{ - return fr_tan_bam32(fr_rad_to_bam(rad, radix)); -} - -/* fr_tan_deg32: tan from degrees at caller-specified radix. s15.16 result. - * radix 0 = integer degrees, radix > 0 = fixed-point degrees with that - * many fractional bits. s32 input so e.g. radix=16 gives s15.16 degrees. */ -s32 fr_tan_deg32(s32 deg, u16 radix) -{ - u16 bam = (radix == 0) ? FR_DEG2BAM_I((s16)deg) - : fr_deg_to_bam(deg, radix); - return fr_tan_bam32(bam); -} - -/*======================================================= - * fr_atan_bam32 - Arctangent via binary search on the tan table. - * - * Input: positive ratio in s15.16 (caller handles signs/quadrants). - * Output: BAM angle (u16) in [0, 0x4000) representing [0, 90 deg). - * - * Algorithm: - * 1. If x <= 0: return 0. - * 2. If x >= table[127]: return near-pole BAM (saturate ~89.3 deg). - * 3. Binary search: 7 iterations on 128 entries to bracket. - * 4. Linear interpolation within bracket for 7 fractional bits. - * 5. Assemble: bam = (idx << 7) | frac. - */ -static u16 fr_atan_bam32(s32 x) -{ - s32 lo, hi, mid; - s32 idx, d, num, frac; - u32 ux; - - if (x <= 0) - return 0; - - ux = (u32)x; - - /* Saturate near the pole */ - if (ux >= gFR_TAN_TAB_Q[127]) - return (u16)((127u << FR_TAN32_FRAC_BITS) + FR_TAN32_FRAC_MASK); - - /* Binary search: find lo such that table[lo] <= ux < table[lo+1]. - * The table is monotonically increasing. */ - lo = 0; - hi = 127; - while (lo < hi) { - mid = (lo + hi + 1) >> 1; - if (gFR_TAN_TAB_Q[mid] <= ux) - lo = mid; - else - hi = mid - 1; - } - - /* lo is now the index where table[lo] <= ux < table[lo+1]. */ - idx = lo; - - /* Linear interpolation within the bracket */ - d = (s32)(gFR_TAN_TAB_Q[idx + 1] - gFR_TAN_TAB_Q[idx]); - num = (s32)(ux - gFR_TAN_TAB_Q[idx]); - if (d > 0) - frac = (s32)(((s64)num << FR_TAN32_FRAC_BITS) / d); - else - frac = 0; - - if (frac > FR_TAN32_FRAC_MASK) - frac = FR_TAN32_FRAC_MASK; - - return (u16)(((u32)idx << FR_TAN32_FRAC_BITS) + (u32)frac); -} - -/*======================================================= - * fr_atan2_32 - Full-circle atan2 using the tan table binary search. - * - * Input: y, x as s32 values at radix 16 (s15.16). - * Output: radians at out_radix. - * Range: [-pi, pi]. Returns 0 for atan2(0, 0). - * - * Algorithm: - * 1. Handle axis cases. - * 2. Compute ratio = |y| / |x| or |x| / |y| (whichever <= 1.0) in s15.16. - * 3. Binary search -> BAM angle in [0, pi/4]. - * 4. If |y| > |x|: angle = pi/2 - angle. - * 5. Apply quadrant from signs of x and y. - */ -s32 fr_atan2_32(s32 y, s32 x, u16 out_radix) -{ - s32 ax, ay, ratio; - u16 bam; - s32 angle; - s32 pi, half_pi; - - pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); - half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); - - /* Axis cases */ - if (x == 0) { - if (y > 0) return half_pi; - if (y < 0) return -half_pi; - return 0; - } - if (y == 0) - return (x > 0) ? 0 : pi; - - ax = (x < 0) ? -x : x; - ay = (y < 0) ? -y : y; - - /* Compute ratio in s15.16. Use the smaller/larger to stay in [0, 1.0] - * for the initial lookup, then complement if needed. */ - if (ay <= ax) { - /* angle in [0, 45 deg]: ratio = ay/ax */ - ratio = (s32)(((s64)ay << 16) / ax); - bam = fr_atan_bam32(ratio); - /* Convert BAM to radians at out_radix */ - angle = FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); - } else { - /* angle in (45, 90 deg): ratio = ax/ay, angle = pi/2 - atan(ratio) */ - ratio = (s32)(((s64)ax << 16) / ay); - bam = fr_atan_bam32(ratio); - angle = half_pi - FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); - } - - /* Apply quadrant from signs of x and y */ - if (x > 0) - return (y > 0) ? angle : -angle; - else - return (y > 0) ? (pi - angle) : (angle - pi); -} diff --git a/src/FR_tan_table.h b/src/FR_tan_table.h deleted file mode 100644 index bdee54c..0000000 --- a/src/FR_tan_table.h +++ /dev/null @@ -1,115 +0,0 @@ -/** - * @file FR_tan_table.h - tangent quadrant tables (u32, s15.16) - * - * Master table: gFR_TAN_TAB_Q[129] - * 129 entries covering [0, pi/2] in s15.16 fixed-point. - * table[i] = round(tan(i * pi/2 / 128) * 65536), i=0..127 - * table[128] = 0x7FFFFFFF (pole saturation) - * 7-bit index + 7-bit lerp from 14-bit in-quadrant BAM. - * - * Used by: - * fr_tan_bam32(): entries 0-64 directly (first octant, 0°-45°) - * fr_atan_bam32(): all 129 entries for binary-search arctangent - * - * Second-octant variable-radix tables (derived from entries 64-128): - * gFR_TAN_MANT_Q2[65]: u16 mantissa (top 16 bits) - * gFR_TAN_SHIFT_Q2[65]: u8 shift (bits to left-shift) - * Used by fr_tan_bam32() for division-free 45°-90° path. - * - * Total ROM: 129×4 + 65×2 + 65×1 = 711 bytes - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * Same zlib license as the rest of the library. - */ -#ifndef __FR_TAN_TABLE_H__ -#define __FR_TAN_TABLE_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#ifndef __FR_Platform_Defs_H__ -#include "FR_defs.h" -#endif - -/* ── 129-entry table (used by atan binary search) ───────── */ -#define FR_TAN32_TABLE_BITS (7) -#define FR_TAN32_TABLE_SIZE ((1 << FR_TAN32_TABLE_BITS) + 1) /* 129 */ -#define FR_TAN32_FRAC_BITS (14 - FR_TAN32_TABLE_BITS) /* 7 */ -#define FR_TAN32_FRAC_MAX (1 << FR_TAN32_FRAC_BITS) /* 128 */ -#define FR_TAN32_FRAC_MASK (FR_TAN32_FRAC_MAX - 1) /* 0x7F */ -#define FR_TAN32_FRAC_HALF (FR_TAN32_FRAC_MAX >> 1) /* 64 */ -#define FR_TAN32_QUADRANT (1 << 14) /* 16384 */ - -static const u32 gFR_TAN_TAB_Q[FR_TAN32_TABLE_SIZE] = { - 0, 804, 1609, 2414, - 3220, 4026, 4834, 5644, - 6455, 7268, 8083, 8901, - 9721, 10545, 11372, 12202, - 13036, 13874, 14717, 15564, - 16416, 17273, 18136, 19005, - 19880, 20762, 21650, 22546, - 23449, 24360, 25280, 26208, - 27146, 28093, 29050, 30018, - 30996, 31986, 32988, 34002, - 35030, 36071, 37126, 38196, - 39281, 40382, 41500, 42636, - 43790, 44963, 46156, 47369, - 48605, 49863, 51145, 52451, - 53784, 55144, 56532, 57950, - 59398, 60880, 62395, 63947, - 65536, 67165, 68835, 70548, - 72308, 74116, 75974, 77887, - 79856, 81885, 83977, 86135, - 88365, 90670, 93054, 95523, - 98082, 100736, 103493, 106358, - 109340, 112447, 115687, 119071, - 122609, 126314, 130198, 134276, - 138564, 143081, 147847, 152884, - 158218, 163878, 169896, 176309, - 183161, 190499, 198380, 206870, - 216043, 225990, 236817, 248648, - 261634, 275959, 291845, 309568, - 329472, 351993, 377693, 407305, - 441808, 482534, 531352, 590958, - 665398, 761030, 888450, 1066730, - 1334016, 1779314, 2669641, 5340086, - 2147483647 -}; - -/* ── Second-octant variable-radix tables (used by forward tan) ── */ - -/* Mantissa table: top 16 bits of gFR_TAN_TAB_Q[64..128]. - * gFR_TAN_MANT_Q2[i] = gFR_TAN_TAB_Q[64+i] >> gFR_TAN_SHIFT_Q2[i] - * 65 entries × 2 bytes = 130 bytes ROM. - */ -static const u16 gFR_TAN_MANT_Q2[65] = { - 32768, 33582, 34417, 35274, 36154, 37058, 37987, 38943, - 39928, 40942, 41988, 43067, 44182, 45335, 46527, 47761, - 49041, 50368, 51746, 53179, 54670, 56223, 57843, 59535, - 61304, 63157, 65099, 33569, 34641, 35770, 36961, 38221, - 39554, 40969, 42474, 44077, 45790, 47624, 49595, 51717, - 54010, 56497, 59204, 62162, 65408, 34494, 36480, 38696, - 41184, 43999, 47211, 50913, 55226, 60316, 33209, 36934, - 41587, 47564, 55528, 33335, 41688, 55603, 41713, 41719, - 65535 -}; - -/* Shift table: bits to left-shift mantissa to reconstruct s15.16 value. - * 65 entries × 1 byte = 65 bytes ROM. - */ -static const u8 gFR_TAN_SHIFT_Q2[65] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, - 15 -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif /* __FR_TAN_TABLE_H__ */ diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h deleted file mode 100644 index f57edd6..0000000 --- a/src/FR_trig_table.h +++ /dev/null @@ -1,106 +0,0 @@ -/** - * @file FR_trig_table.h - 129-entry quadrant sine table for FR_Math 2.0 - * - * This table covers one quadrant [0, pi/2] inclusive in 128 intervals (so - * 129 entries). Indexed by a 7-bit BAM (binary angular measure) sub-index. - * Used by fr_sin_bam / fr_cos_bam in FR_math.c. - * - * Output format: u0.15 (unsigned, 15 fractional bits). So - * gFR_SIN_TAB_Q[0] = round(sin(0) * 32768) = 0 - * gFR_SIN_TAB_Q[64] = round(sin(pi/4) * 32768) = 23170 - * gFR_SIN_TAB_Q[128] = round(sin(pi/2) * 32768) = 32768 - * - * Generated by tools/coef-gen.py — do not hand-edit. - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * Same zlib license as the rest of the library. - */ -#ifndef __FR_TRIG_TABLE_H__ -#define __FR_TRIG_TABLE_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#define FR_TRIG_TABLE_BITS (7) /* log2(intervals) */ -#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) /* entries = intervals + 1 */ - -/* Derived constants for fr_sin_bam / fr_cos_bam. - * - * The BAM has 16 bits total: 2 top bits for quadrant, 14 bits in-quadrant. - * The in-quadrant value is split into (FR_TRIG_TABLE_BITS) table-index bits - * and (FR_TRIG_FRAC_BITS) interpolation-fraction bits, so - * FR_TRIG_TABLE_BITS + FR_TRIG_FRAC_BITS = 14. - * - * Changing FR_TRIG_TABLE_BITS (and regenerating the table with coef-gen.py) - * is the single knob for ROM-vs-precision trade-off. Every other constant - * below derives from it automatically. - */ -#define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) -#define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) -#define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) -#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) /* rounding bias */ -#define FR_TRIG_QUADRANT (1 << 14) /* in-quadrant span */ - -static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { - 0, 402, 804, 1206, 1608, 2009, 2411, 2811, - 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, - 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, - 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, - 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, - 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, - 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, - 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, - 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, - 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, - 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, - 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, - 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, - 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, - 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, - 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, - 32768 -}; - -/* ---- Tangent table: 65 entries covering one octant [0, pi/4] ---- - * - * gFR_TAN_TAB_O[i] = round(tan(i * pi/4 / 64) * 32768) for i = 0..64 - * - * Output format: u0.15 stored as u16 (unsigned, 15 fractional bits). - * Entry[64] = 32768 (tan 45° = 1.0 exactly) requires u16; it does not - * fit in s16. - * - * The table is used by fr_tan_bam() in FR_math.c. The first-octant - * lookup gives a raw result in u0.15 which is then shifted to s15.16. - * Second-octant (>45°) uses the reciprocal identity: - * tan(pi/2 - x) = 1/tan(x) - * - * 130 bytes ROM. - */ -#define FR_TAN_TABLE_BITS (6) -#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) /* 65 */ -#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) /* 7 */ -#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) /* 128 */ -#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) /* 0x7F */ -#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) /* 64 */ -#define FR_TAN_OCTANT (1 << 13) /* 8192 */ - -static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { - 0, 402, 804, 1207, 1610, 2013, 2417, 2822, - 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, - 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, - 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, - 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, - 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, - 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, - 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, - 32768 -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif /* __FR_TRIG_TABLE_H__ */ diff --git a/tests/test_full_sweep.c b/tests/test_full_sweep.c deleted file mode 100644 index 64d7365..0000000 --- a/tests/test_full_sweep.c +++ /dev/null @@ -1,346 +0,0 @@ -/** - * test_full_sweep.c — exhaustive error sweep for cos and tan (old & new) - * - * Three independent sweeps, each in its native input domain: - * BAM: all 65536 u16 values (0..65535) - * Radian: every s15.16 LSB from -2pi to +2pi (~823k values) - * Degree: fr_tan_deg32(s32,16) at s15.16, 1/1024 deg steps, ±360 deg (~738k) - * FR_Tan(s16,6) at s9.6 for old (s16 limits range) - * FR_TanI(deg) tested at integer-degree-aligned subset - * - * Error metrics: - * cos: % of full scale (1.0). |comp/65536 - ref| * 100 - * tan: relative % when |ref| >= 0.01, else absolute % of 1.0 - * Skipped when |ref| > 1000 (near-pole, unrepresentable in s15.16) - * - * Also reports average ns/call for each function. - */ - -#include -#include -#include -#include -#include "FR_math.h" -#include "FR_trig_table.h" -#include "FR_tan_table.h" - -/* FR_tan32.c functions */ -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan32(s32 rad, u16 radix); -extern s32 fr_tan_deg32(s32 deg, u16 radix); - -/* ── sweep accumulator ─────────────────────────────── */ - -typedef struct { - const char *name; - double peak_err; - double ref_at_peak; - s32 val_at_peak; - double sum_err; - long count; - char peak_label[64]; - double total_ns; - long time_count; -} sweep_t; - -static void sw_init(sweep_t *s, const char *name) -{ - memset(s, 0, sizeof(*s)); - s->name = name; -} - -static void sw_cos(sweep_t *s, double ref, s32 comp, const char *label) -{ - double comp_dbl = (double)comp / 65536.0; - double pct = fabs(comp_dbl - ref) * 100.0; - s->sum_err += pct; - s->count++; - if (pct > s->peak_err) { - s->peak_err = pct; - s->ref_at_peak = ref; - s->val_at_peak = comp; - strncpy(s->peak_label, label, sizeof(s->peak_label) - 1); - } -} - -#define TAN_CLIP 1000.0 -#define TAN_ZERO 0.01 - -static void sw_tan(sweep_t *s, double ref, s32 comp, const char *label) -{ - if (fabs(ref) > TAN_CLIP) return; - double comp_dbl = (double)comp / 65536.0; - double abs_err = fabs(comp_dbl - ref); - double pct = (fabs(ref) >= TAN_ZERO) - ? (abs_err / fabs(ref)) * 100.0 - : abs_err * 100.0; - s->sum_err += pct; - s->count++; - if (pct > s->peak_err) { - s->peak_err = pct; - s->ref_at_peak = ref; - s->val_at_peak = comp; - strncpy(s->peak_label, label, sizeof(s->peak_label) - 1); - } -} - -static double now_ns(void) -{ - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; -} - -static void sw_header(void) -{ - printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n", - "Function", "Peak Err", "Avg Err", "ns/call", - "Peak At", "Ref Value", "Got (s32)"); - printf("| %-26s | %10s | %10s | %7s | %-30s | %12s | %10s |\n", - "--------------------------", "----------", "----------", "-------", - "------------------------------", "------------", "----------"); -} - -static void sw_print(const sweep_t *s) -{ - double avg = (s->count > 0) ? s->sum_err / (double)s->count : 0.0; - double ns = (s->time_count > 0) ? s->total_ns / (double)s->time_count : 0.0; - printf("| %-26s | %9.4f%% | %9.5f%% | %5.1f | %-30s | %12.6f | %10d |\n", - s->name, s->peak_err, avg, ns, s->peak_label, - s->ref_at_peak, (int)s->val_at_peak); -} - -/* ════════════════════════════════════════════════════════ - * BAM sweep: all 65536 u16 values - * ════════════════════════════════════════════════════════ */ -static void sweep_bam(void) -{ - sweep_t cos_old, tan_old, tan_new; - sw_init(&cos_old, "fr_cos_bam"); - sw_init(&tan_old, "fr_tan_bam (old)"); - sw_init(&tan_new, "fr_tan_bam32 (new)"); - - for (long b = 0; b < 65536; b++) { - u16 bam = (u16)b; - double rad = (double)bam * 2.0 * M_PI / 65536.0; - char label[64]; - snprintf(label, sizeof(label), "BAM %5u (%7.2f deg)", - bam, (double)bam * 360.0 / 65536.0); - - sw_cos(&cos_old, cos(rad), fr_cos_bam(bam), label); - sw_tan(&tan_old, tan(rad), fr_tan_bam(bam), label); - sw_tan(&tan_new, tan(rad), fr_tan_bam32(bam), label); - } - - /* timing */ - { - volatile s32 sink = 0; - double t0, t1; - long N = 65536; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_cos_bam((u16)b); - t1 = now_ns(); - cos_old.total_ns = t1 - t0; cos_old.time_count = N; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b); - t1 = now_ns(); - tan_old.total_ns = t1 - t0; tan_old.time_count = N; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b); - t1 = now_ns(); - tan_new.total_ns = t1 - t0; tan_new.time_count = N; - - (void)sink; - } - - printf("### BAM domain — all 65536 u16 values\n\n"); - sw_header(); - sw_print(&cos_old); - sw_print(&tan_old); - sw_print(&tan_new); - printf("\ntan samples: old=%ld, new=%ld (rest skipped near poles)\n\n", - tan_old.count, tan_new.count); -} - -/* ════════════════════════════════════════════════════════ - * Radian sweep: every s15.16 LSB from -2pi to +2pi - * ════════════════════════════════════════════════════════ */ -static void sweep_rad(void) -{ - sweep_t cos_old, tan_old, tan_new; - sw_init(&cos_old, "fr_cos (s15.16)"); - sw_init(&tan_old, "fr_tan (s15.16)"); - sw_init(&tan_new, "fr_tan32 (s15.16)"); - - s32 two_pi = (s32)(2.0 * M_PI * 65536.0 + 0.5); /* 411775 */ - long total = 0; - - for (s32 r = -two_pi; r <= two_pi; r++) { - double rad = (double)r / 65536.0; - char label[64]; - snprintf(label, sizeof(label), "r16=%d (%.4f rad)", r, rad); - - sw_cos(&cos_old, cos(rad), fr_cos(r, 16), label); - sw_tan(&tan_old, tan(rad), fr_tan(r, 16), label); - sw_tan(&tan_new, tan(rad), fr_tan32(r, 16), label); - total++; - } - - /* timing */ - { - volatile s32 sink = 0; - double t0, t1; - long N = 65536; - s32 step = (2 * two_pi) / N; - if (step < 1) step = 1; - - t0 = now_ns(); - for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_cos(r, 16); - t1 = now_ns(); - cos_old.total_ns = t1 - t0; cos_old.time_count = N; - - t0 = now_ns(); - for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan(r, 16); - t1 = now_ns(); - tan_old.total_ns = t1 - t0; tan_old.time_count = N; - - t0 = now_ns(); - for (s32 r = -two_pi; r <= two_pi; r += step) sink += fr_tan32(r, 16); - t1 = now_ns(); - tan_new.total_ns = t1 - t0; tan_new.time_count = N; - - (void)sink; - } - - printf("### Radian domain — every s15.16 LSB, -2pi..+2pi (%ld values)\n\n", total); - sw_header(); - sw_print(&cos_old); - sw_print(&tan_old); - sw_print(&tan_new); - printf("\ntan samples: old=%ld, new=%ld\n\n", tan_old.count, tan_new.count); -} - -/* ════════════════════════════════════════════════════════ - * Degree sweep: all 65536 s16 values at radix 6 (s9.6) - * s15.16 degrees: every LSB from -360*65536 to +360*65536 (~823k values) - * FR_Tan(deg,16) — old, s16 input limits to ±0.5 deg (too narrow!) - * fr_tan_deg32(deg,16) — new, s32 input, full s15.16 range - * FR_TanI(deg) — integer degrees (tested at integer-aligned subset) - * - * NOTE: FR_Tan still takes s16, so its s15.16 sweep only covers ±0.5 deg. - * To get a fair comparison we ALSO test FR_Tan at radix=6 (s9.6, ±512 deg). - * ════════════════════════════════════════════════════════ */ -static void sweep_deg(void) -{ - sweep_t cos_old, tan_old_s16, tan_new_full, tan_old_int; - sw_init(&cos_old, "FR_Cos (s9.6 deg)"); - sw_init(&tan_old_s16, "FR_Tan (s9.6 deg, s16)"); - sw_init(&tan_new_full, "fr_tan_deg32 (s15.16 deg)"); - sw_init(&tan_old_int, "FR_TanI (int deg)"); - - /* New path: s15.16 degrees, every LSB from -360 to +360. - * 360 * 65536 = 23592960. Total ~47M values — too many. - * Use same density as radian sweep: ~823k values. - * -360..+360 deg = 720 deg range. 823551 / 720 ≈ 1144 steps/deg. - * That's close to radix=10 (1024 steps/deg). Use radix=16 with - * step = 65536/1024 = 64 to get ~720k values. */ - s32 deg360_s16 = 360L * 65536; - s32 step_new = 64; /* every 64th LSB of s15.16 = 1/1024 deg */ - long total_new = 0; - - for (s32 d = -deg360_s16; d <= deg360_s16; d += step_new) { - double deg_dbl = (double)d / 65536.0; - double rad = deg_dbl * M_PI / 180.0; - char label[64]; - snprintf(label, sizeof(label), "d16=%d (%.4f deg)", (int)d, deg_dbl); - - double rt = tan(rad); - sw_tan(&tan_new_full, rt, fr_tan_deg32(d, 16), label); - - /* FR_TanI at integer-degree subset */ - if (d % 65536 == 0) { - s16 ideg = (s16)(d / 65536); - char ilabel[64]; - snprintf(ilabel, sizeof(ilabel), "deg=%d", ideg); - sw_tan(&tan_old_int, rt, FR_TanI(ideg), ilabel); - } - - total_new++; - } - - /* Old path: FR_Tan takes s16, so use radix=6 (s9.6) to cover ±512 deg */ - long total_old = 0; - for (long d = -32768; d <= 32767; d++) { - s16 dval = (s16)d; - double deg_dbl = (double)d / 64.0; - double rad = deg_dbl * M_PI / 180.0; - char label[64]; - snprintf(label, sizeof(label), "d6=%d (%.3f deg)", (int)d, deg_dbl); - - sw_cos(&cos_old, cos(rad), FR_Cos(dval, 6), label); - sw_tan(&tan_old_s16, tan(rad), FR_Tan(dval, 6), label); - total_old++; - } - - /* timing */ - { - volatile s32 sink = 0; - double t0, t1; - long N = 65536; - - t0 = now_ns(); - for (long d = -32768; d <= 32767; d++) sink += FR_Cos((s16)d, 6); - t1 = now_ns(); - cos_old.total_ns = t1 - t0; cos_old.time_count = N; - - t0 = now_ns(); - for (long d = -32768; d <= 32767; d++) sink += FR_Tan((s16)d, 6); - t1 = now_ns(); - tan_old_s16.total_ns = t1 - t0; tan_old_s16.time_count = N; - - s32 tstep = (2 * deg360_s16) / N; - t0 = now_ns(); - for (s32 d = -deg360_s16; d <= deg360_s16; d += tstep) - sink += fr_tan_deg32(d, 16); - t1 = now_ns(); - tan_new_full.total_ns = t1 - t0; tan_new_full.time_count = N; - - t0 = now_ns(); - for (long i = -360; i < 360; i++) sink += FR_TanI((s16)i); - t1 = now_ns(); - tan_old_int.total_ns = t1 - t0; tan_old_int.time_count = 720; - - (void)sink; - } - - printf("### Degree domain\n\n"); - printf("fr_tan_deg32: s32 input, radix=16, every 1/1024 deg, ±360 deg (%ld values)\n", total_new); - printf("FR_Tan: s16 input, radix=6 (s9.6), all 65536 s16 values (%ld values)\n\n", total_old); - sw_header(); - sw_print(&cos_old); - sw_print(&tan_old_s16); - sw_print(&tan_new_full); - sw_print(&tan_old_int); - printf("\ntan samples: old_s16=%ld, new_s32=%ld, old_int=%ld\n\n", - tan_old_s16.count, tan_new_full.count, tan_old_int.count); -} - -/* ── main ──────────────────────────────────────────── */ - -int main(void) -{ - printf("FR_Math exhaustive error sweep: cos, tan (old), tan32 (new)\n"); - printf("============================================================\n"); - printf("cos: error = %% of full scale (1.0)\n"); - printf("tan: relative %% when |ref|>=0.01, absolute when near zero, skip |ref|>1000\n\n"); - - sweep_bam(); - sweep_rad(); - sweep_deg(); - - printf("Done.\n"); - return 0; -} diff --git a/tests/test_pole_table.c b/tests/test_pole_table.c deleted file mode 100644 index 02a2829..0000000 --- a/tests/test_pole_table.c +++ /dev/null @@ -1,92 +0,0 @@ -/** - * test_pole_table.c — dump values around both tan poles (90° and 270°) - * - * For ±20 entries around each pole, show: - * BAM index, degrees, ground truth, and each function's output + error - */ - -#include -#include -#include "FR_math.h" -#include "FR_trig_table.h" -#include "FR_tan_table.h" - -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan32(s32 rad, u16 radix); -extern s32 fr_tan_deg32(s32 deg, u16 radix); - -static double to_dbl(s32 v) { return (double)v / 65536.0; } - -static double err_pct(double ref, double got) -{ - double ae = fabs(got - ref); - if (fabs(ref) >= 0.01) - return (ae / fabs(ref)) * 100.0; - return ae * 100.0; /* absolute near zero */ -} - -static void dump_pole(u16 pole_bam, const char *name, int range) -{ - printf("\n### Pole at %s (BAM %u)\n\n", name, pole_bam); - printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", - "BAM", "deg", "ground truth", - "tan_bam OLD", "err%", - "tan_bam32 NEW", "err%", - "tan(rad) NEW", "err%", - "tan(deg) NEW", "err%"); - printf("| %5s | %9s | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", - "-----", "---------", "--------------", - "--------------", "-------", - "--------------", "-------", - "--------------", "-------", - "--------------", "-------"); - - for (int i = -range; i <= range; i++) { - u16 bam = (u16)((int)pole_bam + i); - double rad_dbl = (double)bam * 2.0 * M_PI / 65536.0; - double deg_dbl = (double)bam * 360.0 / 65536.0; - double truth = tan(rad_dbl); - - /* BAM functions */ - double v_bam_old = to_dbl(fr_tan_bam(bam)); - double v_bam_new = to_dbl(fr_tan_bam32(bam)); - - /* Radian: convert BAM to s15.16 radian the same way the library does */ - s32 r16 = (s32)(rad_dbl * 65536.0 + (rad_dbl >= 0 ? 0.5 : -0.5)); - double v_rad_new = to_dbl(fr_tan32(r16, 16)); - - /* Degree: convert to s9.6 */ - s16 d6 = (s16)(int)(deg_dbl * 64.0 + (deg_dbl >= 0 ? 0.5 : -0.5)); - double v_deg_new = to_dbl(fr_tan_deg32(d6, 6)); - - /* Clip display for readability */ - if (fabs(truth) > 100000.0) { - printf("| %5u | %9.3f | %14s | %14s %7s | %14s %7s | %14s %7s | %14s %7s |\n", - bam, deg_dbl, ">>pole<<", - "---", "---", "---", "---", "---", "---", "---", "---"); - continue; - } - - printf("| %5u | %9.3f | %14.4f | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% | %14.4f %6.2f%% |\n", - bam, deg_dbl, truth, - v_bam_old, err_pct(truth, v_bam_old), - v_bam_new, err_pct(truth, v_bam_new), - v_rad_new, err_pct(truth, v_rad_new), - v_deg_new, err_pct(truth, v_deg_new)); - } -} - -int main(void) -{ - printf("FR_Math tan pole neighborhood dump\n"); - printf("==================================\n"); - printf("Values within ±20 BAM steps of each pole.\n"); - printf("Error: relative %% when |ref|>=0.01, absolute otherwise.\n"); - - /* 90° pole = BAM 16384, 270° pole = BAM 49152 */ - dump_pole(16384, "90 deg", 20); - dump_pole(49152, "270 deg", 20); - - printf("\nDone.\n"); - return 0; -} diff --git a/tests/test_sweep_csv.c b/tests/test_sweep_csv.c deleted file mode 100644 index 5b33cbc..0000000 --- a/tests/test_sweep_csv.c +++ /dev/null @@ -1,149 +0,0 @@ -/** - * test_sweep_csv.c — emit CSV + summary for all 65536 BAM values - * - * Compares 3 tan implementations: - * fr_tan_bam (old): 65-entry u16 octant table + reciprocal - * fr_tan_bam32_d64: sin/cos from 129-entry cos table, s64 div - * fr_tan_bam32 (new): direct 65-entry u32 tan table lookup, no div - * - * Ground truth clamped to ±SAT_MAX for fair pole comparison. - * - * Output: build/tan_sweep.csv - */ - -#include -#include -#include -#include "FR_math.h" -#include "FR_trig_table.h" -#include "FR_tan_table.h" - -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan_bam32_d64(u16 bam); - -#define SAT_MAX (32767.999984741211) - -static double to_dbl(s32 v) { return (double)v / 65536.0; } - -static double clamp(double v) -{ - if (v > SAT_MAX) return SAT_MAX; - if (v < -SAT_MAX) return -SAT_MAX; - return v; -} - -static double err_pct(double ref, double got) -{ - if (fabs(ref) >= SAT_MAX && fabs(got) >= SAT_MAX) - return 0.0; - double ae = fabs(got - ref); - if (fabs(ref) >= 0.01) - return (ae / fabs(ref)) * 100.0; - return ae * 100.0; -} - -static double now_ns(void) -{ - struct timespec ts; - clock_gettime(CLOCK_MONOTONIC, &ts); - return (double)ts.tv_sec * 1e9 + (double)ts.tv_nsec; -} - -int main(void) -{ - FILE *fp = fopen("build/tan_sweep.csv", "w"); - if (!fp) { perror("fopen"); return 1; } - - fprintf(fp, "bam,degrees,tan_truth," - "tan_old,tan_d64,tan_direct," - "err_old,err_d64,err_direct\n"); - - for (long b = 0; b < 65536; b++) { - u16 bam = (u16)b; - double deg = (double)bam * 360.0 / 65536.0; - double rad = (double)bam * 2.0 * M_PI / 65536.0; - double truth = clamp(tan(rad)); - - double v_old = to_dbl(fr_tan_bam(bam)); - double v_d64 = to_dbl(fr_tan_bam32_d64(bam)); - double v_direct = to_dbl(fr_tan_bam32(bam)); - - fprintf(fp, "%u,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f\n", - bam, deg, truth, - v_old, v_d64, v_direct, - err_pct(truth, v_old), - err_pct(truth, v_d64), - err_pct(truth, v_direct)); - } - - fclose(fp); - - /* Timing */ - volatile s32 sink = 0; - double t0, t1; - long N = 65536; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_tan_bam((u16)b); - t1 = now_ns(); - double ns_old = (t1 - t0) / N; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_tan_bam32_d64((u16)b); - t1 = now_ns(); - double ns_d64 = (t1 - t0) / N; - - t0 = now_ns(); - for (long b = 0; b < N; b++) sink += fr_tan_bam32((u16)b); - t1 = now_ns(); - double ns_direct = (t1 - t0) / N; - - (void)sink; - - /* Stats */ - printf("Wrote build/tan_sweep.csv (65536 rows)\n\n"); - - double peak_old = 0, peak_d64 = 0, peak_dir = 0; - double sum_old = 0, sum_d64 = 0, sum_dir = 0; - int peak_bam_old = 0, peak_bam_d64 = 0, peak_bam_dir = 0; - - for (long b = 0; b < 65536; b++) { - u16 bam = (u16)b; - double rad = (double)bam * 2.0 * M_PI / 65536.0; - double truth = clamp(tan(rad)); - - double e_old = err_pct(truth, to_dbl(fr_tan_bam(bam))); - double e_d64 = err_pct(truth, to_dbl(fr_tan_bam32_d64(bam))); - double e_dir = err_pct(truth, to_dbl(fr_tan_bam32(bam))); - - sum_old += e_old; sum_d64 += e_d64; sum_dir += e_dir; - if (e_old > peak_old) { peak_old = e_old; peak_bam_old = bam; } - if (e_d64 > peak_d64) { peak_d64 = e_d64; peak_bam_d64 = bam; } - if (e_dir > peak_dir) { peak_dir = e_dir; peak_bam_dir = bam; } - } - - printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n", - "Implementation", "Table", "Peak Err", "Avg Err", "ns/call", "Peak At"); - printf("| %-24s | %5s | %10s | %10s | %7s | %-24s |\n", - "------------------------", "-----", "----------", "----------", "-------", - "------------------------"); - printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", - "fr_tan_bam (old)", "65u16", - peak_old, sum_old / 65536, ns_old, - peak_bam_old, peak_bam_old * 360.0 / 65536.0); - printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", - "fr_tan_bam32_d64 (s/c)", "none", - peak_d64, sum_d64 / 65536, ns_d64, - peak_bam_d64, peak_bam_d64 * 360.0 / 65536.0); - printf("| %-24s | %5s | %9.4f%% | %9.5f%% | %5.1f | BAM %5d (%6.2f deg) |\n", - "fr_tan_bam32 (direct)", "65u32", - peak_dir, sum_dir / 65536, ns_direct, - peak_bam_dir, peak_bam_dir * 360.0 / 65536.0); - - printf("\nOld: 65-entry u16 octant table + reciprocal (div in 2nd octant).\n"); - printf("d64: sin/cos via 129-entry cos table, always s64 div.\n"); - printf("Direct: 65-entry u32 quadrant tan table, lerp with shift, NO div.\n"); - - printf("\nDone.\n"); - return 0; -} diff --git a/tests/test_tan32.c b/tests/test_tan32.c deleted file mode 100644 index ec13184..0000000 --- a/tests/test_tan32.c +++ /dev/null @@ -1,424 +0,0 @@ -/* - * test_tan32.c - Head-to-head comparison of LUT32 tan/atan2 vs current impls - * - * Compares: - * fr_tan_bam32() vs fr_tan_bam() — BAM accuracy + speed - * fr_tan32() vs fr_tan() — radian accuracy - * fr_tan_deg32() vs FR_TanI() — integer-degree accuracy - * fr_atan2_32() vs FR_atan2() — accuracy + speed - * - * Compile: - * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32.c -lm -o build/test_tan32 - * - * @author M A Chatterjee - */ - -#include -#include -#include -#include -#include "../src/FR_math.h" - -/* Declarations for the new LUT32 functions (in FR_tan32.c) */ -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan32(s32 rad, u16 radix); -extern s32 fr_tan_deg32(s32 deg, u16 radix); -extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix); - -/*======================================================= - * Helpers - */ -static double fr2d(s32 val, int radix) { - return (double)val / (double)(1L << radix); -} - -/*======================================================= - * Test 1: Tangent accuracy sweep — all 65536 BAM points - */ -static void test_tan_bam_accuracy(void) -{ - double max_err_old = 0.0, max_err_new = 0.0; - double sum_err_old = 0.0, sum_err_new = 0.0; - u16 max_bam_old = 0, max_bam_new = 0; - int count = 0; - u16 bam; - - printf("## Tangent BAM Accuracy (65536 BAM points)\n\n"); - - for (bam = 0; bam < 0xFFFFu; bam++) { - double angle = (double)bam * 2.0 * M_PI / 65536.0; - double ref = tan(angle); - double old_val, new_val, err_old, err_new; - - /* Skip near poles where tan -> infinity (within ~1 deg of 90/270) */ - if (fabs(ref) > 500.0) continue; - - old_val = fr2d(fr_tan_bam(bam), 16); - new_val = fr2d(fr_tan_bam32(bam), 16); - - /* Percentage error relative to reference */ - if (fabs(ref) > 0.001) { - err_old = fabs((old_val - ref) / ref) * 100.0; - err_new = fabs((new_val - ref) / ref) * 100.0; - } else { - /* Near zero, use absolute error scaled to % of 1.0 */ - err_old = fabs(old_val - ref) * 100.0; - err_new = fabs(new_val - ref) * 100.0; - } - - sum_err_old += err_old; - sum_err_new += err_new; - if (err_old > max_err_old) { max_err_old = err_old; max_bam_old = bam; } - if (err_new > max_err_new) { max_err_new = err_new; max_bam_new = bam; } - count++; - } - - printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n"); - printf("|----------------|----------------------|----------------------|\n"); - printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); - printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); - printf("| Peak BAM | 0x%04X | 0x%04X |\n", max_bam_old, max_bam_new); - printf("| Points tested | %6d | %6d |\n", count, count); - printf("\n"); -} - -/*======================================================= - * Test 2: Tangent radian accuracy — sweep at radix 16 - */ -static void test_tan_radian_accuracy(void) -{ - double max_err_old = 0.0, max_err_new = 0.0; - double sum_err_old = 0.0, sum_err_new = 0.0; - int count = 0; - int i; - - printf("## Tangent Radian Accuracy (10000 points, radix 16)\n\n"); - - /* Sweep radians from -pi to pi in 10000 steps */ - for (i = 0; i < 10000; i++) { - double angle = -M_PI + 2.0 * M_PI * (double)i / 10000.0; - double ref = tan(angle); - s32 rad16 = (s32)(angle * 65536.0); - double old_val, new_val, err_old, err_new; - - if (fabs(ref) > 500.0) continue; - - old_val = fr2d(fr_tan(rad16, 16), 16); - new_val = fr2d(fr_tan32(rad16, 16), 16); - - if (fabs(ref) > 0.001) { - err_old = fabs((old_val - ref) / ref) * 100.0; - err_new = fabs((new_val - ref) / ref) * 100.0; - } else { - err_old = fabs(old_val - ref) * 100.0; - err_new = fabs(new_val - ref) * 100.0; - } - - sum_err_old += err_old; - sum_err_new += err_new; - if (err_old > max_err_old) max_err_old = err_old; - if (err_new > max_err_new) max_err_new = err_new; - count++; - } - - printf("| Metric | Current (fr_tan) | LUT32 (fr_tan32) |\n"); - printf("|----------------|----------------------|----------------------|\n"); - printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); - printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); - printf("| Points tested | %6d | %6d |\n", count, count); - printf("\n"); -} - -/*======================================================= - * Test 3: Tangent integer-degree accuracy — 0..359 degrees - */ -static void test_tan_degree_accuracy(void) -{ - double max_err_old = 0.0, max_err_new = 0.0; - double sum_err_old = 0.0, sum_err_new = 0.0; - int count = 0; - int deg; - - printf("## Tangent Integer-Degree Accuracy (360 degrees)\n\n"); - - for (deg = 0; deg < 360; deg++) { - double angle = (double)deg * M_PI / 180.0; - double ref = tan(angle); - double old_val, new_val, err_old, err_new; - - if (fabs(ref) > 500.0) continue; - - old_val = fr2d(FR_TanI((s16)deg), 16); - new_val = fr2d(fr_tan_deg32((s16)deg, 0), 16); - - if (fabs(ref) > 0.001) { - err_old = fabs((old_val - ref) / ref) * 100.0; - err_new = fabs((new_val - ref) / ref) * 100.0; - } else { - err_old = fabs(old_val - ref) * 100.0; - err_new = fabs(new_val - ref) * 100.0; - } - - sum_err_old += err_old; - sum_err_new += err_new; - if (err_old > max_err_old) max_err_old = err_old; - if (err_new > max_err_new) max_err_new = err_new; - count++; - } - - printf("| Metric | Current (FR_TanI) | LUT32 (fr_tan_deg32) |\n"); - printf("|----------------|----------------------|----------------------|\n"); - printf("| Peak error (%%) | %11.6f | %11.6f |\n", max_err_old, max_err_new); - printf("| Avg error (%%) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); - printf("| Points tested | %6d | %6d |\n", count, count); - printf("\n"); -} - -/*======================================================= - * Test 4: Tangent speed comparison (BAM) - */ -static void test_tan_speed(void) -{ - volatile s32 sink = 0; - clock_t start, end; - double old_ns, new_ns; - int iters = 1000000; - int i; - - printf("## Tangent Speed (%d iterations)\n\n", iters); - - /* Warm up */ - for (i = 0; i < 1000; i++) sink += fr_tan_bam((u16)i); - - start = clock(); - for (i = 0; i < iters; i++) - sink += fr_tan_bam((u16)(i & 0xFFFF)); - end = clock(); - old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; - - start = clock(); - for (i = 0; i < iters; i++) - sink += fr_tan_bam32((u16)(i & 0xFFFF)); - end = clock(); - new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; - - printf("| Metric | Current (fr_tan_bam) | LUT32 (fr_tan_bam32) |\n"); - printf("|----------------|----------------------|----------------------|\n"); - printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns); - printf("\n"); - - (void)sink; -} - -/*======================================================= - * Test 5: atan2 accuracy sweep — angles at multiple radii - */ -static void test_atan2_accuracy(void) -{ - double max_err_old = 0.0, max_err_new = 0.0; - double sum_err_old = 0.0, sum_err_new = 0.0; - int count = 0; - int ri, ai; - static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 }; - - printf("## atan2 Accuracy Sweep (5 radii x 65536 angles)\n\n"); - - for (ri = 0; ri < 5; ri++) { - double r = radii[ri]; - for (ai = 0; ai < 65536; ai++) { - double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI; - double fx = r * cos(angle); - double fy = r * sin(angle); - s32 x = (s32)(fx * 65536.0); - s32 y = (s32)(fy * 65536.0); - double ref = atan2(fy, fx); - double old_val, new_val, err_old, err_new; - - /* Skip degenerate */ - if (x == 0 && y == 0) continue; - - old_val = fr2d(FR_atan2(y, x, 16), 16); - new_val = fr2d(fr_atan2_32(y, x, 16), 16); - - /* Absolute error in radians, wrapped to [-pi, pi] */ - err_old = fabs(old_val - ref); - err_new = fabs(new_val - ref); - /* Handle wraparound near +/-pi: difference > pi means we - * crossed the branch cut; true angular error is 2*pi - diff */ - if (err_old > M_PI) err_old = 2.0 * M_PI - err_old; - if (err_new > M_PI) err_new = 2.0 * M_PI - err_new; - /* Convert to % of pi for reporting */ - err_old = err_old / M_PI * 100.0; - err_new = err_new / M_PI * 100.0; - - sum_err_old += err_old; - sum_err_new += err_new; - if (err_old > max_err_old) max_err_old = err_old; - if (err_new > max_err_new) max_err_new = err_new; - count++; - } - } - - printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n"); - printf("|---------------------|----------------------|----------------------|\n"); - printf("| Peak error (%% of pi)| %11.6f | %11.6f |\n", max_err_old, max_err_new); - printf("| Avg error (%% of pi) | %11.6f | %11.6f |\n", sum_err_old / count, sum_err_new / count); - printf("| Points tested | %6d | %6d |\n", count, count); - printf("\n"); -} - -/*======================================================= - * Test 6: atan2 speed comparison - */ -static void test_atan2_speed(void) -{ - volatile s32 sink = 0; - clock_t start, end; - double old_ns, new_ns; - int iters = 500000; - int i; - - printf("## atan2 Speed (%d iterations)\n\n", iters); - - /* Pre-compute some x,y pairs */ - s32 xs[256], ys[256]; - for (i = 0; i < 256; i++) { - double angle = (double)i * 2.0 * M_PI / 256.0; - xs[i] = (s32)(10.0 * cos(angle) * 65536.0); - ys[i] = (s32)(10.0 * sin(angle) * 65536.0); - } - - /* Warm up */ - for (i = 0; i < 256; i++) sink += FR_atan2(ys[i], xs[i], 16); - - start = clock(); - for (i = 0; i < iters; i++) - sink += FR_atan2(ys[i & 0xFF], xs[i & 0xFF], 16); - end = clock(); - old_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; - - start = clock(); - for (i = 0; i < iters; i++) - sink += fr_atan2_32(ys[i & 0xFF], xs[i & 0xFF], 16); - end = clock(); - new_ns = (double)(end - start) / CLOCKS_PER_SEC * 1e9 / iters; - - printf("| Metric | Current (FR_atan2) | LUT32 (fr_atan2_32) |\n"); - printf("|----------------|----------------------|----------------------|\n"); - printf("| ns/call | %11.1f | %11.1f |\n", old_ns, new_ns); - printf("\n"); - - (void)sink; -} - -/*======================================================= - * Test 7: Quick spot checks for correctness - */ -static int test_spot_checks(void) -{ - int fails = 0; - s32 v; - - printf("## Spot Checks\n\n"); - - /* tan(0) = 0 */ - v = fr_tan_bam32(0); - if (v != 0) { printf(" FAIL: tan_bam32(0) = %d, expected 0\n", v); fails++; } - - /* tan(45 deg) = 1.0 = 65536 in s15.16 */ - v = fr_tan_bam32(0x2000); /* 45 deg = 8192 BAM */ - if (abs(v - 65536) > 2) { printf(" FAIL: tan_bam32(45deg) = %d, expected ~65536\n", v); fails++; } - - /* tan(180 deg) = 0 */ - v = fr_tan_bam32(0x8000); - if (v != 0) { printf(" FAIL: tan_bam32(180deg) = %d, expected 0\n", v); fails++; } - - /* tan(90 deg) = pole */ - v = fr_tan_bam32(0x4000); - if (v != FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(90deg) = %d, expected %d\n", v, FR_TRIG_MAXVAL); fails++; } - - /* tan(270 deg) = -pole */ - v = fr_tan_bam32(0xC000); - if (v != -FR_TRIG_MAXVAL) { printf(" FAIL: tan_bam32(270deg) = %d, expected %d\n", v, -FR_TRIG_MAXVAL); fails++; } - - /* Radian wrapper: tan(pi/4) = 1.0 */ - { - s32 pi_4 = (s32)(M_PI / 4.0 * 65536.0); - v = fr_tan32(pi_4, 16); - if (abs(v - 65536) > 100) { printf(" FAIL: tan32(pi/4) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; } - } - - /* Degree wrapper: tan(45) = 1.0 */ - v = fr_tan_deg32(45, 0); - if (abs(v - 65536) > 100) { printf(" FAIL: tan_deg32(45) = %d (%.6f), expected ~65536\n", v, fr2d(v, 16)); fails++; } - - /* Degree wrapper: tan(0) = 0 */ - v = fr_tan_deg32(0, 0); - if (v != 0) { printf(" FAIL: tan_deg32(0) = %d, expected 0\n", v); fails++; } - - /* atan2(0, 1) = 0 */ - v = fr_atan2_32(0, 65536, 16); - if (v != 0) { printf(" FAIL: atan2_32(0,1) = %d, expected 0\n", v); fails++; } - - /* atan2(1, 0) = pi/2 */ - { - s32 expected = FR_CHRDX(FR_kQ2RAD, FR_kPREC, 16); - v = fr_atan2_32(65536, 0, 16); - if (abs(v - expected) > 2) { printf(" FAIL: atan2_32(1,0) = %d, expected ~%d\n", v, expected); fails++; } - } - - /* atan2(1, 1) = pi/4 */ - { - double ref = M_PI / 4.0; - s32 expected = (s32)(ref * 65536.0); - v = fr_atan2_32(65536, 65536, 16); - if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(1,1) = %d (%.6f), expected ~%d (%.6f)\n", - v, fr2d(v, 16), expected, ref); fails++; } - } - - /* atan2(-1, -1) = -3*pi/4 */ - { - double ref = -3.0 * M_PI / 4.0; - s32 expected = (s32)(ref * 65536.0); - v = fr_atan2_32(-65536, -65536, 16); - if (abs(v - expected) > 200) { printf(" FAIL: atan2_32(-1,-1) = %d (%.6f), expected ~%d (%.6f)\n", - v, fr2d(v, 16), expected, ref); fails++; } - } - - if (fails == 0) - printf(" All spot checks PASSED\n"); - else - printf(" %d spot check(s) FAILED\n", fails); - printf("\n"); - - return fails; -} - -/*======================================================= - * Main - */ -int main(void) -{ - int fails; - - printf("# FR_tan32 Head-to-Head Comparison Report\n\n"); - - fails = test_spot_checks(); - test_tan_bam_accuracy(); - test_tan_radian_accuracy(); - test_tan_degree_accuracy(); - test_tan_speed(); - test_atan2_accuracy(); - test_atan2_speed(); - - printf("## Summary\n\n"); - printf("Design notes:\n"); - printf(" - tan: sin/cos from the existing 129-entry cosine table (258B, already in ROM)\n"); - printf(" No extra tan table needed for the forward path. One s64 division per call.\n"); - printf(" Current uses octant table (130B) + reciprocal division for [45,90] deg.\n\n"); - printf(" - atan2: binary search on 129-entry u32 tan table (516B) + quadrant mapping\n"); - printf(" Current uses hypot_fast8 -> asin/acos chain (more code, no extra table)\n\n"); - printf(" - Tan table (516B) needed only for atan2. Could be omitted if atan2 not used.\n\n"); - - return fails ? 1 : 0; -} diff --git a/tests/test_tan32_peaks.c b/tests/test_tan32_peaks.c deleted file mode 100644 index 32e0437..0000000 --- a/tests/test_tan32_peaks.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * test_tan32_peaks.c - Find peak error locations and print ±20 entries around them - */ -#include -#include -#include -#include "../src/FR_math.h" - -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan32(s32 rad, u16 radix); -extern s32 fr_tan_deg32(s32 deg, u16 radix); - -static double fr2d(s32 val, int radix) { - return (double)val / (double)(1L << radix); -} - -static double tan_err(double val, double ref) { - if (fabs(ref) > 0.001) - return fabs((val - ref) / ref) * 100.0; - else - return fabs(val - ref) * 100.0; -} - -/*======================================================= - * BAM peak finder + neighborhood dump - */ -static void peak_tan_bam(void) -{ - s32 bam; - s32 old_peak_bam = 0, new_peak_bam = 0; - double old_peak = 0, new_peak = 0; - - /* Pass 1: find peaks */ - for (bam = 0; bam < 65536; bam++) { - double angle = (double)bam * 2.0 * M_PI / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) continue; - double ov = fr2d(fr_tan_bam((u16)bam), 16); - double nv = fr2d(fr_tan_bam32((u16)bam), 16); - double oe = tan_err(ov, ref); - double ne = tan_err(nv, ref); - if (oe > old_peak) { old_peak = oe; old_peak_bam = bam; } - if (ne > new_peak) { new_peak = ne; new_peak_bam = bam; } - } - - printf("## tan BAM: OLD peak at BAM %d (%.4f deg), NEW peak at BAM %d (%.4f deg)\n\n", - (int)old_peak_bam, old_peak_bam * 360.0 / 65536.0, - (int)new_peak_bam, new_peak_bam * 360.0 / 65536.0); - - /* Pass 2: dump ±20 around OLD peak */ - printf("### OLD peak neighborhood (BAM %d ± 20)\n\n", (int)old_peak_bam); - printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); - printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); - for (bam = old_peak_bam - 20; bam <= old_peak_bam + 20; bam++) { - u16 b = (u16)(bam & 0xFFFF); - double angle = (double)b * 2.0 * M_PI / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; } - double ov = fr2d(fr_tan_bam(b), 16); - double nv = fr2d(fr_tan_bam32(b), 16); - printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", - bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), - (bam == old_peak_bam) ? " <-- OLD PEAK" : (bam == new_peak_bam) ? " <-- NEW PEAK" : ""); - } - - if (abs((int)(new_peak_bam - old_peak_bam)) > 25) { - printf("\n### NEW peak neighborhood (BAM %d ± 20)\n\n", (int)new_peak_bam); - printf("| BAM | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); - printf("|-------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); - for (bam = new_peak_bam - 20; bam <= new_peak_bam + 20; bam++) { - u16 b = (u16)(bam & 0xFFFF); - double angle = (double)b * 2.0 * M_PI / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) { printf("| %5d | %9.4f | (pole) | | | | |\n", bam, b * 360.0 / 65536.0); continue; } - double ov = fr2d(fr_tan_bam(b), 16); - double nv = fr2d(fr_tan_bam32(b), 16); - printf("| %5d | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", - bam, b * 360.0 / 65536.0, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), - (bam == new_peak_bam) ? " <-- NEW PEAK" : ""); - } - } - - printf("\n"); -} - -/*======================================================= - * Radian peak finder + neighborhood dump - */ -static void peak_tan_rad(void) -{ - s32 rad16; - s32 old_peak_r = 0, new_peak_r = 0; - double old_peak = 0, new_peak = 0; - - for (rad16 = -65536; rad16 <= 65535; rad16++) { - double angle = (double)rad16 / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) continue; - double ov = fr2d(fr_tan(rad16, 16), 16); - double nv = fr2d(fr_tan32(rad16, 16), 16); - double oe = tan_err(ov, ref); - double ne = tan_err(nv, ref); - if (oe > old_peak) { old_peak = oe; old_peak_r = rad16; } - if (ne > new_peak) { new_peak = ne; new_peak_r = rad16; } - } - - printf("## tan Radian: OLD peak at r16=%d (%.6f rad, %.4f deg), NEW peak at r16=%d (%.6f rad, %.4f deg)\n\n", - (int)old_peak_r, old_peak_r / 65536.0, old_peak_r / 65536.0 * 180.0 / M_PI, - (int)new_peak_r, new_peak_r / 65536.0, new_peak_r / 65536.0 * 180.0 / M_PI); - - /* dump around OLD peak */ - printf("### OLD peak neighborhood (r16=%d ± 20)\n\n", (int)old_peak_r); - printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); - printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); - for (rad16 = old_peak_r - 20; rad16 <= old_peak_r + 20; rad16++) { - double angle = (double)rad16 / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) continue; - double ov = fr2d(fr_tan(rad16, 16), 16); - double nv = fr2d(fr_tan32(rad16, 16), 16); - printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", - (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), - (rad16 == old_peak_r) ? " <-- OLD PEAK" : (rad16 == new_peak_r) ? " <-- NEW PEAK" : ""); - } - - if (abs((int)(new_peak_r - old_peak_r)) > 25) { - printf("\n### NEW peak neighborhood (r16=%d ± 20)\n\n", (int)new_peak_r); - printf("| r16 | rad | deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); - printf("|--------|-------------|-----------|----------------|----------------|-------------|----------------|-------------|\n"); - for (rad16 = new_peak_r - 20; rad16 <= new_peak_r + 20; rad16++) { - double angle = (double)rad16 / 65536.0; - double ref = tan(angle); - if (fabs(ref) > 500.0) continue; - double ov = fr2d(fr_tan(rad16, 16), 16); - double nv = fr2d(fr_tan32(rad16, 16), 16); - printf("| %6d | %11.7f | %9.4f | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", - (int)rad16, angle, angle * 180.0 / M_PI, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), - (rad16 == new_peak_r) ? " <-- NEW PEAK" : ""); - } - } - - printf("\n"); -} - -/*======================================================= - * Degree peak finder + neighborhood dump - */ -static void peak_tan_deg(void) -{ - s16 deg; - s16 old_peak_d = 0, new_peak_d = 0; - double old_peak = 0, new_peak = 0; - - for (deg = -180; deg <= 179; deg++) { - double ref = tan((double)deg * M_PI / 180.0); - if (fabs(ref) > 500.0) continue; - double ov = fr2d(FR_TanI(deg), 16); - double nv = fr2d(fr_tan_deg32(deg, 0), 16); - double oe = tan_err(ov, ref); - double ne = tan_err(nv, ref); - if (oe > old_peak) { old_peak = oe; old_peak_d = deg; } - if (ne > new_peak) { new_peak = ne; new_peak_d = deg; } - } - - printf("## tan Degree: OLD peak at %d deg, NEW peak at %d deg\n\n", - (int)old_peak_d, (int)new_peak_d); - - /* dump full range around both peaks, ±20 deg */ - s16 lo = old_peak_d < new_peak_d ? old_peak_d : new_peak_d; - s16 hi = old_peak_d > new_peak_d ? old_peak_d : new_peak_d; - lo = (lo - 20 < -180) ? -180 : lo - 20; - hi = (hi + 20 > 179) ? 179 : hi + 20; - - printf("### Neighborhood (%d .. %d deg)\n\n", (int)lo, (int)hi); - printf("| deg | ref (libm) | OLD result | OLD err %% | NEW result | NEW err %% |\n"); - printf("|------|----------------|----------------|-------------|----------------|-------------|\n"); - for (deg = lo; deg <= hi; deg++) { - double ref = tan((double)deg * M_PI / 180.0); - if (fabs(ref) > 500.0) { printf("| %4d | (pole) | | | | |\n", (int)deg); continue; } - double ov = fr2d(FR_TanI(deg), 16); - double nv = fr2d(fr_tan_deg32(deg, 0), 16); - printf("| %4d | %14.8f | %14.8f | %11.6f | %14.8f | %11.6f |%s\n", - (int)deg, ref, ov, tan_err(ov, ref), nv, tan_err(nv, ref), - (deg == old_peak_d && deg == new_peak_d) ? " <-- BOTH PEAK" : - (deg == old_peak_d) ? " <-- OLD PEAK" : - (deg == new_peak_d) ? " <-- NEW PEAK" : ""); - } - printf("\n"); -} - -int main(void) -{ - printf("# Peak Error Neighborhoods for Tangent Functions\n\n"); - peak_tan_bam(); - peak_tan_rad(); - peak_tan_deg(); - return 0; -} diff --git a/tests/test_tan32_sweep.c b/tests/test_tan32_sweep.c deleted file mode 100644 index 99dc83f..0000000 --- a/tests/test_tan32_sweep.c +++ /dev/null @@ -1,318 +0,0 @@ -/* - * test_tan32_sweep.c - Comprehensive -65536..+65536 sweep for all tan/atan functions - * - * Generates a single comparison table: old vs new, BAM / radian / degree, - * with peak error, avg error, and speed for each function. - * - * Compile: - * cc -Isrc -Wall -Os src/FR_tan32.c src/FR_math.c tests/test_tan32_sweep.c -lm -o build/test_tan32_sweep - * - * @author M A Chatterjee - */ - -#include -#include -#include -#include -#include "../src/FR_math.h" - -extern s32 fr_tan_bam32(u16 bam); -extern s32 fr_tan32(s32 rad, u16 radix); -extern s32 fr_tan_deg32(s32 deg, u16 radix); -extern s32 fr_atan2_32(s32 y, s32 x, u16 out_radix); - -static double fr2d(s32 val, int radix) { - return (double)val / (double)(1L << radix); -} - -typedef struct { - double peak_err; - double sum_err; - int count; -} stats_t; - -static void stats_init(stats_t *s) { s->peak_err = 0; s->sum_err = 0; s->count = 0; } -static void stats_add(stats_t *s, double err) { - if (err > s->peak_err) s->peak_err = err; - s->sum_err += err; - s->count++; -} -static double stats_avg(stats_t *s) { return s->count > 0 ? s->sum_err / s->count : 0; } - -/*======================================================= - * Speed measurement helper - */ -static double measure_ns(void (*fn)(volatile s32 *sink, int n), int n) { - volatile s32 sink = 0; - clock_t start, end; - /* warm up */ - fn(&sink, n / 10); - start = clock(); - fn(&sink, n); - end = clock(); - return (double)(end - start) / CLOCKS_PER_SEC * 1e9 / n; -} - -/* Speed test functions */ -static void speed_tan_bam_old(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_tan_bam((u16)(i & 0xFFFF)); -} -static void speed_tan_bam_new(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_tan_bam32((u16)(i & 0xFFFF)); -} -static void speed_tan_rad_old(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_tan((s32)((i * 7) - n * 3), 16); -} -static void speed_tan_rad_new(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_tan32((s32)((i * 7) - n * 3), 16); -} -static void speed_tan_deg_old(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += FR_TanI((s16)(i % 360)); -} -static void speed_tan_deg_new(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_tan_deg32((s16)(i % 360), 0); -} - -static s32 g_xs[256], g_ys[256]; -static void init_atan_data(void) { - int i; - for (i = 0; i < 256; i++) { - double a = (double)i * 2.0 * M_PI / 256.0; - g_xs[i] = (s32)(10.0 * cos(a) * 65536.0); - g_ys[i] = (s32)(10.0 * sin(a) * 65536.0); - } -} -static void speed_atan2_old(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += FR_atan2(g_ys[i & 0xFF], g_xs[i & 0xFF], 16); -} -static void speed_atan2_new(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += fr_atan2_32(g_ys[i & 0xFF], g_xs[i & 0xFF], 16); -} -static void speed_atan_old(volatile s32 *sink, int n) { - int i; for (i = 0; i < n; i++) *sink += FR_atan((s32)((i * 13) - n * 6), 16, 16); -} -static void speed_atan_new(volatile s32 *sink, int n) { - /* FR_atan(x, r, or) = FR_atan2(x, 1< 500.0) continue; - - ov = fr2d(fr_tan_bam((u16)bam), 16); - nv = fr2d(fr_tan_bam32((u16)bam), 16); - - if (fabs(ref) > 0.001) { - oe = fabs((ov - ref) / ref) * 100.0; - ne = fabs((nv - ref) / ref) * 100.0; - } else { - oe = fabs(ov - ref) * 100.0; - ne = fabs(nv - ref) * 100.0; - } - stats_add(old_s, oe); - stats_add(new_s, ne); - } -} - -static void sweep_tan_rad(stats_t *old_s, stats_t *new_s) -{ - s32 rad16; - stats_init(old_s); - stats_init(new_s); - - /* Sweep s15.16 radians from -65536 to +65535 (= -1.0 to +1.0 rad ≈ ±57 deg). - * Step by 1 LSB = full 131072-point sweep. */ - for (rad16 = -65536; rad16 <= 65535; rad16++) { - double angle = (double)rad16 / 65536.0; - double ref = tan(angle); - double ov, nv, oe, ne; - if (fabs(ref) > 500.0) continue; - - ov = fr2d(fr_tan(rad16, 16), 16); - nv = fr2d(fr_tan32(rad16, 16), 16); - - if (fabs(ref) > 0.001) { - oe = fabs((ov - ref) / ref) * 100.0; - ne = fabs((nv - ref) / ref) * 100.0; - } else { - oe = fabs(ov - ref) * 100.0; - ne = fabs(nv - ref) * 100.0; - } - stats_add(old_s, oe); - stats_add(new_s, ne); - } -} - -static void sweep_tan_deg(stats_t *old_s, stats_t *new_s) -{ - s16 deg; - stats_init(old_s); - stats_init(new_s); - - for (deg = -180; deg <= 179; deg++) { - double ref = tan((double)deg * M_PI / 180.0); - double ov, nv, oe, ne; - if (fabs(ref) > 500.0) continue; - - ov = fr2d(FR_TanI(deg), 16); - nv = fr2d(fr_tan_deg32(deg, 0), 16); - - if (fabs(ref) > 0.001) { - oe = fabs((ov - ref) / ref) * 100.0; - ne = fabs((nv - ref) / ref) * 100.0; - } else { - oe = fabs(ov - ref) * 100.0; - ne = fabs(nv - ref) * 100.0; - } - stats_add(old_s, oe); - stats_add(new_s, ne); - } -} - -/*======================================================= - * Atan sweeps - */ -static void sweep_atan2(stats_t *old_s, stats_t *new_s) -{ - int ri, ai; - static const double radii[] = { 0.1, 1.0, 10.0, 100.0, 1000.0 }; - stats_init(old_s); - stats_init(new_s); - - for (ri = 0; ri < 5; ri++) { - double r = radii[ri]; - for (ai = 0; ai < 65536; ai++) { - double angle = (double)ai * 2.0 * M_PI / 65536.0 - M_PI; - double fx = r * cos(angle), fy = r * sin(angle); - s32 x = (s32)(fx * 65536.0), y = (s32)(fy * 65536.0); - double ref = atan2(fy, fx); - double ov, nv, oe, ne; - if (x == 0 && y == 0) continue; - - ov = fr2d(FR_atan2(y, x, 16), 16); - nv = fr2d(fr_atan2_32(y, x, 16), 16); - - oe = fabs(ov - ref); ne = fabs(nv - ref); - if (oe > M_PI) oe = 2.0 * M_PI - oe; - if (ne > M_PI) ne = 2.0 * M_PI - ne; - oe = oe / M_PI * 100.0; - ne = ne / M_PI * 100.0; - - stats_add(old_s, oe); - stats_add(new_s, ne); - } - } -} - -static void sweep_atan(stats_t *old_s, stats_t *new_s) -{ - s32 x16; - stats_init(old_s); - stats_init(new_s); - - /* Sweep atan input from -65536 to +65535 (= -1.0 to +1.0 in s15.16). - * Step by 8 to keep runtime reasonable (16384 points). - * Error metric: absolute angular error as % of pi/2 (atan range). */ - for (x16 = -65536; x16 <= 65535; x16 += 8) { - double xf = (double)x16 / 65536.0; - double ref = atan(xf); - double ov, nv, oe, ne; - - ov = fr2d(FR_atan(x16, 16, 16), 16); - nv = fr2d(fr_atan2_32(x16, 65536, 16), 16); - - /* Use absolute angular error / (pi/2) * 100, same approach as atan2 */ - oe = fabs(ov - ref) / (M_PI / 2.0) * 100.0; - ne = fabs(nv - ref) / (M_PI / 2.0) * 100.0; - - stats_add(old_s, oe); - stats_add(new_s, ne); - } -} - -/*======================================================= - * Main - */ -int main(void) -{ - stats_t old_s, new_s; - double old_ns, new_ns; - int N = 1000000; - - init_atan_data(); - - printf("# Comprehensive Function Comparison: Old vs New\n\n"); - printf("Sweep range: full domain for each input type\n"); - printf("Error metric: relative %% (or absolute*100 near zero)\n"); - printf("Speed: ns/call on this platform\n\n"); - - printf("## Tangent Functions\n\n"); - printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n"); - printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n"); - - sweep_tan_bam(&old_s, &new_s); - old_ns = measure_ns(speed_tan_bam_old, N); - new_ns = measure_ns(speed_tan_bam_new, N); - printf("| tan_bam (BAM) | OLD | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n", - old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); - printf("| tan_bam32 (BAM) | NEW | 0..65535 BAM | %7d | %11.6f | %11.6f | %5.1f |\n", - new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); - - sweep_tan_rad(&old_s, &new_s); - old_ns = measure_ns(speed_tan_rad_old, N); - new_ns = measure_ns(speed_tan_rad_new, N); - printf("| fr_tan (rad@r16) | OLD | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n", - old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); - printf("| fr_tan32 (rad@r16) | NEW | -65536..+65535 r16| %7d | %11.6f | %11.6f | %5.1f |\n", - new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); - - sweep_tan_deg(&old_s, &new_s); - old_ns = measure_ns(speed_tan_deg_old, N); - new_ns = measure_ns(speed_tan_deg_new, N); - printf("| FR_TanI (deg) | OLD | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n", - old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); - printf("| fr_tan_deg32 (deg) | NEW | -180..+179 deg | %7d | %11.6f | %11.6f | %5.1f |\n", - new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); - - printf("\n## Inverse Tangent Functions\n\n"); - printf("| Function | Impl | Sweep Range | Points | Peak Err %% | Avg Err %% | ns/call |\n"); - printf("|--------------------|-------|-------------------|---------|-------------|-------------|--------:|\n"); - - sweep_atan2(&old_s, &new_s); - old_ns = measure_ns(speed_atan2_old, N / 2); - new_ns = measure_ns(speed_atan2_new, N / 2); - printf("| FR_atan2 (s15.16) | OLD | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n", - old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); - printf("| fr_atan2_32(s15.16)| NEW | 5 radii x 65536 | %7d | %11.6f | %11.6f | %5.1f |\n", - new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); - - sweep_atan(&old_s, &new_s); - old_ns = measure_ns(speed_atan_old, N / 2); - new_ns = measure_ns(speed_atan_new, N / 2); - printf("| FR_atan (s15.16) | OLD | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n", - old_s.count, old_s.peak_err, stats_avg(&old_s), old_ns); - printf("| atan2_32(x,1) eq. | NEW | -65536..+65535 /8 | %7d | %11.6f | %11.6f | %5.1f |\n", - new_s.count, new_s.peak_err, stats_avg(&new_s), new_ns); - - printf("\n## Notes\n\n"); - printf("- BAM sweep: 0..65535 (full circle, excludes |tan|>500 near poles)\n"); - printf("- Radian sweep: -65536..+65535 at radix 16 = -1.0..+1.0 rad = +/-57.3 deg\n"); - printf("- Degree sweep: -180..+179 integer degrees\n"); - printf("- atan2 error: %% of pi (angular error / pi * 100)\n"); - printf("- atan error: absolute angular error / (pi/2) * 100%%\n"); - printf("- atan2_32(x,1) is used as the NEW atan since it's equivalent to atan(x)\n"); - - return 0; -} diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp index f1d7c5d..da66323 100644 --- a/tests/test_tdd.cpp +++ b/tests/test_tdd.cpp @@ -2567,7 +2567,7 @@ static void section_accuracy_table(void) { double deg = actual_angle * 180.0 / M_PI; stats_add(&st, deg, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); } - SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π bypass; s64 lerp near poles"); + SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "sign extract + small-angle bypass at 0/pi/2pi; r24 cot(d)~1/d near poles; BAM table elsewhere"); } /* FR_SinI */ { diff --git a/tools/trig_neighborhood.cpp b/tools/trig_neighborhood.cpp index 3266515..4c275d8 100644 --- a/tools/trig_neighborhood.cpp +++ b/tools/trig_neighborhood.cpp @@ -181,18 +181,24 @@ static s32 eval(Func f, double val, int radix, int out_radix, case F_SIN: { s32 rad_fp = tofix(rad, radix); *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(sin(actual_rad), 16); raw = fr_sin(rad_fp, (u16)radix); break; } case F_COS: { s32 rad_fp = tofix(rad, radix); *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(cos(actual_rad), 16); raw = fr_cos(rad_fp, (u16)radix); break; } case F_TAN: { s32 rad_fp = tofix(rad, radix); *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(tan_ref(actual_rad), 16); raw = fr_tan(rad_fp, (u16)radix); break; } @@ -211,18 +217,29 @@ static s32 eval(Func f, double val, int radix, int out_radix, case F_SIN_DEG: { s32 deg_fp = tofix(val, radix); *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + if (is_sin(f)) *expected = qN(sin(actual_rad2), 16); + else if (is_cos(f)) *expected = qN(cos(actual_rad2), 16); + else *expected = qN(tan_ref(actual_rad2), 16); raw = fr_sin_deg(deg_fp, (u16)radix); break; } case F_COS_DEG: { s32 deg_fp = tofix(val, radix); *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + *expected = qN(cos(actual_rad2), 16); raw = fr_cos_deg(deg_fp, (u16)radix); break; } case F_TAN_DEG: { s32 deg_fp = tofix(val, radix); *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + *expected = qN(tan_ref(actual_rad2), 16); raw = fr_tan_deg(deg_fp, (u16)radix); break; } From b32940c77821a8159b985c408bcfc415156334f8 Mon Sep 17 00:00:00 2001 From: deftio Date: Mon, 4 May 2026 17:52:43 -0700 Subject: [PATCH 5/7] docs and examples update, src clean up, accuracy metrics and tune up --- README.md | 86 ++++--- agents.md | 17 +- docker/Dockerfile | 55 ++-- docs/README.md | 36 +-- docs/api-reference.md | 3 + docs/building.md | 4 + docs/examples.md | 71 ++++-- docs/releases.md | 13 +- examples/README.md | 27 +- examples/arduino_smoke/README.md | 36 +++ examples/basic-math/README.md | 63 +++++ examples/fixed-point-basics/Makefile | 29 +++ examples/fixed-point-basics/README.md | 63 +++++ .../fixed-point-basics/fixed_point_basics | Bin 0 -> 35600 bytes .../fixed-point-basics/fixed_point_basics.cpp | 236 ++++++++++++++++++ examples/log-exp-curves/Makefile | 29 +++ examples/log-exp-curves/README.md | 63 +++++ examples/log-exp-curves/log_exp_curves | Bin 0 -> 35824 bytes examples/log-exp-curves/log_exp_curves.cpp | 192 ++++++++++++++ examples/posix-example/Makefile | 29 +++ examples/posix-example/README.md | 46 ++++ examples/trig-accuracy/Makefile | 57 +++++ examples/trig-accuracy/README.md | 80 ++++++ examples/trig-accuracy/trig_accuracy | Bin 0 -> 36960 bytes examples/trig-accuracy/trig_accuracy.cpp | 139 +++++++++++ examples/trig-functions/README.md | 55 ++++ examples/wave-generators/README.md | 57 +++++ examples/waveform-synth/Makefile | 33 +++ examples/waveform-synth/README.md | 76 ++++++ examples/waveform-synth/waveform_synth | Bin 0 -> 35808 bytes examples/waveform-synth/waveform_synth.cpp | 206 +++++++++++++++ keywords.txt | 1 + llms.txt | 14 +- makefile | 42 +++- pages/assets/site.js | 31 ++- pages/guide/api-reference.html | 3 + pages/guide/building.html | 4 + pages/guide/examples.html | 93 +++++-- pages/index.html | 32 +-- pages/releases.html | 25 +- pages/version.json | 1 + release_notes.md | 14 +- scripts/crossbuild_sizes.sh | 112 ++++----- scripts/sync_version.sh | 34 ++- tests/test_tdd.cpp | 30 +-- tools/make_release.sh | 13 +- 46 files changed, 1973 insertions(+), 277 deletions(-) create mode 100644 examples/arduino_smoke/README.md create mode 100644 examples/basic-math/README.md create mode 100644 examples/fixed-point-basics/Makefile create mode 100644 examples/fixed-point-basics/README.md create mode 100755 examples/fixed-point-basics/fixed_point_basics create mode 100644 examples/fixed-point-basics/fixed_point_basics.cpp create mode 100644 examples/log-exp-curves/Makefile create mode 100644 examples/log-exp-curves/README.md create mode 100755 examples/log-exp-curves/log_exp_curves create mode 100644 examples/log-exp-curves/log_exp_curves.cpp create mode 100644 examples/posix-example/Makefile create mode 100644 examples/posix-example/README.md create mode 100644 examples/trig-accuracy/Makefile create mode 100644 examples/trig-accuracy/README.md create mode 100755 examples/trig-accuracy/trig_accuracy create mode 100644 examples/trig-accuracy/trig_accuracy.cpp create mode 100644 examples/trig-functions/README.md create mode 100644 examples/wave-generators/README.md create mode 100644 examples/waveform-synth/Makefile create mode 100644 examples/waveform-synth/README.md create mode 100755 examples/waveform-synth/waveform_synth create mode 100644 examples/waveform-synth/waveform_synth.cpp create mode 100644 pages/version.json diff --git a/README.md b/README.md index 07c7ce5..0cf1c94 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API reference, examples, fixed-point primer, build instructions. -**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. Optional print utility functions are also provided for pretty printing out fixed point numbers over serial links or buffers. +**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (raddix) per operation, trading precision and range explicitly instead of locking into a single format. FR_math is Pure C (C99/C11/C17,with) with C++ wrappers. Compiles under Arduino, PlatformIO, Espressif, many older embedded targets. @@ -28,28 +28,31 @@ radix — Q16.16 is just the reference point for the table. At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the number of fractional bits available. -| Function | Max err (%)* | Avg err (%) | Note | -| --- | --- | --- | --- | -| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table | -| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | -| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | -| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles | -| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass | -| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 | + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | | sqrt | 0.0000 | 0.0000 | Round-to-nearest | -| log2 | 0.0116 | 0.0016 | 65-entry mantissa table | -| pow2 | 0.0018 | 0.0004 | 65-entry fraction table | -| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | -| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | | exp_fast | 0.0009 | 0.0001 | Shift-only scaling | -| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | | pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | | hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | +*Relative error; reference clamped to 1% of full-scale output. + ### What's in the box @@ -57,8 +60,8 @@ number of fractional bits available. | --- | --- | | Arithmetic | FR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX | | Utility | FR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN | -| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam | | Trig (degree) | fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI | +| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam | | Inverse trig | FR_atan, FR_atan2, FR_asin, FR_acos | | Log / exp | FR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 | | Roots | FR_sqrt, FR_hypot, FR_hypot_fast8 | @@ -67,35 +70,35 @@ number of fractional bits available. | 2D transforms | FR_Matrix2D_CPT (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) | | Formatted output | FR_printNumD, FR_printNumF, FR_printNumH, FR_numstr | -### Library size (FR_math.c only, `-Os`) +### Compiled library size (FR_math.c only, `-Os`) -Compiled object code sizes on select platforms (static test build). Your -sizes may vary depending on optimization and linker settings. Sizes -include all code and internal tables; everything is ROMable. +.text section sizes, all code + internal tables, ROMable. Sorted 8-bit → 64-bit. | Target | Lean | Core | Full | | --- | ---:| ---:| ---:| -| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB | -| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB | -| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | +| AVR ATmega328P (8-bit) | 9.2 KB | 12.8 KB | 15.4 KB | +| 68HC11 (8-bit) | 13.3 KB | 18.4 KB | 22.6 KB | +| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | +| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB | +| Cortex-M4 (32-bit) | 3.3 KB | 4.4 KB | 5.5 KB | +| Cortex-M0 (32-bit) | 3.4 KB | 4.5 KB | 5.7 KB | | RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB | | Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | -| 68k | 4.4 KB | 6.2 KB | 7.8 KB | +| m68k (32-bit) | 4.4 KB | 6.2 KB | 7.8 KB | +| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB | +| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | +| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | | x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | | AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | -| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | -| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | -| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB | -| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB | -Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt). -Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot). -Full = all features (+ print, waves, ADSR). -The optional 2D module adds ~1 KB. -\* MSP430, 68HC11, and AVR are 8/16-bit — every 32-bit operation expands to multiple instructions. -See [Building & Testing](docs/building.md) for the full cross-compile setup. +**Lean** (`-DFR_LEAN -DFR_NO_PRINT`): radian trig, inv trig, log/exp, sqrt. +**Core** (`-DFR_CORE_ONLY`): Lean + degree/BAM trig, log10, hypot. +**Full** (default): Core + formatted print, wave generators, ADSR envelope. +Optional C++ 2D module adds ~1 KB. +8/16-bit targets expand some 32-bit op to multiple instructions — hence the larger sizes. +See [Building & Testing](docs/building.md) for the full cross-compile setup. ### Lean build options @@ -222,8 +225,8 @@ The full docs ship in two forms — pick whichever fits how you read. FR_Math has been in service since 2000, originally built for graphics transforms on 16 MHz 68k Palm Pilots. It shipped inside Trumpetsoft's *Inkstorm* on PalmOS, then moved forward through ARM, x86, MIPS, -RISC-V, and various 8/16-bit embedded targets.  -The current release now has a full test suite, bit-exact numerical specification, and +RISC-V, and various 8/16-bit embedded targets. +The current release now has a full test suite, numerical specification, and CI on every push and better documentation. ## License @@ -240,5 +243,6 @@ PRs and suggestions are welcome.  Please be detailed as embedded systems can in ## Version -2.0.7 — see [release_notes.md](release_notes.md) for the v1 → v2 -migration guide, numerical fixes, and new functionality. \ No newline at end of file +See [release_notes.md](release_notes.md) for the v1 → v2 +migration guide, numerical fixes, and new functionality. + diff --git a/agents.md b/agents.md index 806064d..6bd9c32 100644 --- a/agents.md +++ b/agents.md @@ -22,7 +22,7 @@ tests/ Test suite (7 programs, run via `make test`) examples/ Arduino .ino sketches + POSIX example docs/ Markdown documentation pages/ HTML documentation (mirrors docs/) -scripts/ Build, release, version sync helpers +scripts/ Build, release, version sync, size report helpers tools/ Coefficient generators (Python, C++) dev/ Development notes and planning (not shipped) ``` @@ -32,7 +32,9 @@ dev/ Development notes and planning (not shipped) ```bash make lib # compile library objects make test # run all 7 test suites (27+ tests) -make examples # build POSIX example +make examples # build example programs +make size-report # cross-compile size report (Docker) +make size-update # size report + patch doc files make clean # remove build artifacts ``` @@ -85,9 +87,11 @@ Versioned files (all synced automatically): 1. Bump `FR_MATH_VERSION_HEX` in `src/FR_math.h` 2. Run `./scripts/sync_version.sh` -3. Run `./tools/make_release.sh` (full validation gate) -4. Verify `llms.txt` and `agents.md` are current with any API changes -5. Commit, tag, push +3. Run `./scripts/crossbuild_sizes.sh --update` (rebuild size tables) +4. Run `./scripts/accuracy_report.sh --update` (rebuild accuracy tables) +5. Run `./tools/make_release.sh` (full validation gate) +6. Verify `llms.txt` and `agents.md` are current with any API changes +7. Commit, tag, push ## Lean build options @@ -95,13 +99,14 @@ Define before including `FR_math.h` to exclude optional subsystems: | Define | Removes | Savings | |---|---|---| +| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB | | `FR_NO_PRINT` | `FR_printNumF/D/H`, `FR_numstr` | ~1.3 KB | | `FR_NO_WAVES` | `fr_wave_*`, `fr_adsr_*`, `FR_HZ2BAM_INC` | ~0.6 KB | ## Platform targets The library compiles on: AVR (Arduino), ARM Cortex-M0/M4, ESP32, -RISC-V, x86/x64, MSP430, 68k, 8051. Code is 4–8 KB at `-Os` on +RISC-V, x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11. Code is 3–9 KB at `-Os` on 32-bit targets. ## Library publishing diff --git a/docker/Dockerfile b/docker/Dockerfile index 4392617..2eee910 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,36 +1,41 @@ -ARG TARGETPLATFORM=linux/amd64 -FROM debian:bookworm-slim +FROM --platform=linux/amd64 ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive # Cross-compilers for FR_Math size report. -# Bare toolchains only — no IDE, no runtime, no extras. -# Build with: docker build --platform linux/amd64 -t fr-math-sizes docker/ +# Mirrors the proven xelp crossbuild Dockerfile (github.com/deftio/xelp). +# Build with: docker build -t xelp-crossbuild:latest docker/ +# Use libc6-dev-i386 instead of gcc-multilib to avoid conflicts +# with cross-compiler packages on 22.04. RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc g++ make bash binutils \ - gcc-arm-none-eabi libnewlib-arm-none-eabi \ - gcc-riscv64-unknown-elf \ - gcc-12-m68k-linux-gnu \ + build-essential \ + gcc clang tcc \ + libc6-dev-i386 \ + lib32gcc-s1 \ + gcc-aarch64-linux-gnu \ + gcc-arm-none-eabi \ + gcc-msp430 \ + gcc-avr \ gcc-m68hc1x \ + gcc-m68k-linux-gnu \ + gcc-powerpc-linux-gnu \ + gcc-riscv64-linux-gnu \ + gcc-riscv64-unknown-elf \ + gcc-xtensa-lx106 \ + picolibc-xtensa-lx106-elf \ + gcc-mipsel-linux-gnu \ sdcc \ + binutils \ wget ca-certificates xz-utils bzip2 \ && rm -rf /var/lib/apt/lists/* -# MSP430: TI's prebuilt msp430-elf-gcc (not in Debian repos). -RUN wget -q https://dr-download.ti.com/software-development/ide-configuration-compiler-or-debugger/MD-LlCjWuAbzH/9.3.1.2/msp430-gcc-9.3.1.11_linux64.tar.bz2 \ - -O /tmp/msp430-gcc.tar.bz2 \ - && mkdir -p /opt/msp430-gcc \ - && tar -xjf /tmp/msp430-gcc.tar.bz2 -C /opt/msp430-gcc --strip-components=1 \ - && rm /tmp/msp430-gcc.tar.bz2 - -ENV PATH="/opt/msp430-gcc/bin:${PATH}" - -# ESP32 (Xtensa): Espressif's prebuilt toolchain. -RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-13.2.0_20240530/xtensa-esp-elf-13.2.0_20240530-x86_64-linux-gnu.tar.xz \ - -O /tmp/xtensa-gcc.tar.xz \ - && mkdir -p /opt/xtensa-gcc \ - && tar -xJf /tmp/xtensa-gcc.tar.xz -C /opt/xtensa-gcc --strip-components=1 \ - && rm /tmp/xtensa-gcc.tar.xz - -ENV PATH="/opt/xtensa-gcc/bin:${PATH}" +# Espressif unified Xtensa toolchain (ESP32 LX6, ESP32-S2/S3 LX7). +# Not in Ubuntu repos; prebuilt from Espressif's crosstool-NG releases. +RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-15.2.0_20251204/xtensa-esp-elf-15.2.0_20251204-x86_64-linux-gnu.tar.xz \ + -O /tmp/xtensa-esp.tar.xz \ + && tar -xJf /tmp/xtensa-esp.tar.xz -C /opt \ + && for f in /opt/xtensa-esp-elf/bin/xtensa-esp*; do ln -sf "$f" /usr/local/bin/; done \ + && rm /tmp/xtensa-esp.tar.xz WORKDIR /src diff --git a/docs/README.md b/docs/README.md index c7a900a..5dc8485 100644 --- a/docs/README.md +++ b/docs/README.md @@ -48,24 +48,24 @@ radix — Q16.16 is just the reference point for the table. See the | Function | Max err (%)*| Avg err (%) | Note | |---|---:|---:|---| -| sin/cos (BAM) | 0.1526 | 0.0030 | fr_sin_bam/fr_cos_bam direct; 129-entry table | -| sin/cos (deg) | 0.1526 | 0.0029 | FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM | -| sin/cos (rad) | 0.1828 | 0.0033 | fr_sin/fr_cos via fr_rad_to_bam ±2π r16 | -| tan (BAM) | 0.5823 | 0.0008 | fr_tan_bam 65536-pt full; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | fr_tan_deg ±360° s15.16 full; sat at poles | -| tan (rad) | 0.0386 | 0.0001 | fr_tan ±2π r16; r24 pole bypass | -| asin / acos | 0.7771 | 0.0280 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.2564 | 0.0237 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2425 | 0.0155 | 20001-pt full sweep [-10,10]; via FR_atan2 | +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | | sqrt | 0.0000 | 0.0000 | Round-to-nearest | -| log2 | 0.0116 | 0.0016 | 65-entry mantissa table | -| pow2 | 0.0018 | 0.0004 | 65-entry fraction table | -| ln, log10 | 0.0004 | 0.0000 | Via FR_MULK28 from log2 | -| exp | 0.0003 | 0.0000 | FR_MULK28 + FR_pow2 | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | | exp_fast | 0.0009 | 0.0001 | Shift-only scaling | -| pow10 | 0.0005 | 0.0000 | FR_MULK28 + FR_pow2 | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | | pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | 64-bit intermediate | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | | hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | *Relative error; reference clamped to 1% of full-scale output. @@ -235,9 +235,9 @@ the build script. FR_Math has been in service since **2000**, originally built for graphics transforms on 16 MHz 68k Palm Pilots (it shipped inside Trumpetsoft's *Inkstorm*), then ported forward to ARM, x86, MIPS, -RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current -release with a full test suite, bit-exact numerical -specification, and CI on every push. +RISC-V, and various 8/16-bit embedded targets. The current release +has a full test suite, bit-exact numerical specification, and CI on +every push. ## License diff --git a/docs/api-reference.md b/docs/api-reference.md index 79a1cd9..cf1cec5 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -79,6 +79,8 @@ and in return get float-like ergonomics with integer-only codegen. | `FR_OVERFLOW_POS` | `0x7FFFFFFF` (`INT32_MAX`) | Saturating ops when the true result exceeds `+2^31`. | | `FR_OVERFLOW_NEG` | `0x80000000` (`INT32_MIN`) | Saturating ops when the true result is below `−2^31`. | | `FR_DOMAIN_ERROR` | `0x80000000` (`INT32_MIN`) | Functions with an invalid input, e.g. `FR_sqrt(-1)`, `FR_log2(0)`, `FR_asin(2.0)`. **Shares the bit pattern of `FR_OVERFLOW_NEG`**, so don't mix a `≤ FR_OVERFLOW_NEG` check with a domain check — test for the exact sentinel. | +| `FR_TRIG_MAXVAL` | `0x7FFFFFFF` (`INT32_MAX`) | Tangent saturation ceiling. Returned by `fr_tan_bam`, `fr_tan`, `fr_tan_deg`, and `FR_TanI` when the angle is near a pole (90° + k·180°). | +| `FR_TRIG_MINVAL` | `-FR_TRIG_MAXVAL` | Tangent saturation floor. Negative-side pole saturation. | ### Common numerical constants (`FR_math.h`) @@ -396,6 +398,7 @@ represents exactly 1.0 in the s15.16 output format. | --- | --- | --- | | `fr_cos_bam` | `s32 fr_cos_bam(u16 bam)` | s15.16, range [−65536, +65536]. Exact at cardinal angles. | | `fr_sin_bam` | `s32 fr_sin_bam(u16 bam)` | s15.16. Defined as `fr_cos_bam(bam − FR_BAM_QUADRANT)`. | +| `fr_tan_bam` | `s32 fr_tan_bam(u16 bam)` | s15.16. Uses a 65-entry octant table for [0, 45°] and the reciprocal identity `tan(x) = 1/tan(90°−x)` for (45°, 90°). Saturates to `±FR_TRIG_MAXVAL` at the poles (90°, 270°). Returns exact 0 at 0° and 180°. No 64-bit intermediates; one 32-bit division only in the >45° path. | ### Radian-native diff --git a/docs/building.md b/docs/building.md index fea0142..76eb31c 100644 --- a/docs/building.md +++ b/docs/building.md @@ -155,7 +155,9 @@ you do *not* need `libm`. | Motorola 68k | `m68k-linux-gnu-gcc` | Docker. | | Motorola 68HC11 | `m68hc11-gcc` | Docker. | | PowerPC | `powerpc-linux-gnu-gcc` | Docker. | +| MIPS32 | `mipsel-linux-gnu-gcc` | Docker. | | Xtensa LX106 (ESP8266) | `xtensa-lx106-elf-gcc` | Docker. | +| Xtensa LX7 (ESP32-S3) | `xtensa-esp-elf-gcc` | Docker (Espressif toolchain). | | 8051 | `sdcc` | Manual. | ### Code size (.text section, compiled with `-Os`) @@ -171,6 +173,7 @@ usage will be smaller. | Target | Lean | Core | Full | |--------|-----:|-----:|-----:| +| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB | | Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB | | Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB | | ARM Thumb | 3.4 KB | 4.7 KB | 5.9 KB | @@ -179,6 +182,7 @@ usage will be smaller. | Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | | ARM32 | 4.3 KB | 5.8 KB | 7.7 KB | | 68k | 4.4 KB | 6.2 KB | 7.8 KB | +| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB | | x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | | AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | | x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | diff --git a/docs/examples.md b/docs/examples.md index f3e0bed..d54c34e 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,7 +1,7 @@ # Examples Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.8 library with: +example compiles cleanly against the library with: ```bash cc -Isrc example.c src/FR_math.c -o example @@ -200,19 +200,16 @@ int main(void) ## 5. Arctangent and atan2 The inverse-trig functions in FR_Math return angles in -**degrees**, not radians — the output fits in -an `s16` and you can feed it straight back into -`FR_SinI` / `FR_CosI` without any -conversion. This example exercises both `FR_atan` -(single-argument ratio) and `FR_atan2` (full-circle, -two-argument). - -*Caveats:* `FR_atan2` takes only two -arguments (`y`, `x`) and has no radix -parameter — it returns degrees in [−180, 180] as -`s16`. The `radix` argument on -`FR_atan` is the radix of the *input* ratio, -not of the output. +**radians** at a caller-chosen output radix. This +example exercises both `FR_atan` (single-argument +ratio) and `FR_atan2` (full-circle, two-argument). + +*Caveats:* all inverse-trig functions take an +`out_radix` parameter that sets the radix of the +*output*. `FR_atan2(y, x, out_radix)` returns +radians in [−π, π] as `s32` at the chosen +radix. `FR_atan(input, radix, out_radix)` has +separate radixes for input and output. ```c #include @@ -222,18 +219,19 @@ int main(void) { const u16 r = 14; - /* atan(1) = 45 degrees */ - s16 a = FR_atan(I2FR(1, r), r); - printf("atan(1) = %d degrees (expect 45)\n", a); + /* atan(1) = pi/4 radians ≈ 0.7854 */ + s32 a = FR_atan(I2FR(1, r), r, r); + printf("atan(1) = %d (radix %d, expect ~%d)\n", + (int)a, r, (int)(12868)); /* pi/4 at r14 */ /* Full-circle atan2 */ - s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r)); /* 135 deg */ - s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r)); /* -135 deg */ - printf("atan2( 1,-1) = %d\n", q2); - printf("atan2(-1,-1) = %d\n", q3); + s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r); /* 3*pi/4 */ + s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r); /* -3*pi/4 */ + printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603)); + printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603)); /* asin with out-of-domain input */ - s16 bad = FR_asin(I2FR(2, r), r); + s32 bad = FR_asin(I2FR(2, r), r, r); if (bad == FR_DOMAIN_ERROR) printf("asin(2) rejected, good.\n"); return 0; @@ -565,6 +563,35 @@ significant bits — and seven decimal digits survive. The eighth digit (`5` vs `4`) shows the quantization floor: `2^−24 ≈ 6 × 10^−8`, so the last digit is always uncertain. +## Desktop example programs + +In addition to the inline snippets above, the `examples/` directory +contains four self-contained desktop programs. Each has its own +`Makefile` and `README.md`; build artifacts stay within the example's +directory. + +| Directory | What it does | +|---|---| +| [`examples/fixed-point-basics/`](../examples/fixed-point-basics/) | Educational walkthrough of radix interpretation, `I2FR`/`FR2I` round-trips, `FR_NUM` constant construction, aligned add/sub, multiply precision, division, saturation, and `FR_printNumF` formatted output. | +| [`examples/log-exp-curves/`](../examples/log-exp-curves/) | Sweeps `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, and `FR_sqrt` against IEEE double reference values, printing per-point and summary error tables. | +| [`examples/waveform-synth/`](../examples/waveform-synth/) | Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus an ADSR envelope and amplitude-modulated combination. Default mode renders ASCII art; `--csv` mode outputs machine-readable CSV. | +| [`examples/trig-accuracy/`](../examples/trig-accuracy/) | Head-to-head comparison of FR_Math (`FR_SinI`/`FR_CosI`/`FR_TanI`) vs libfixmath (`fix16_sin`/`fix16_cos`/`fix16_tan`) vs IEEE double over 0–360 degrees. Requires libfixmath source. | + +Build all from the repo root: + +```bash +make examples # builds all desktop examples +make run-examples # builds and runs 1-3, plus 4 if libfixmath present +``` + +Or build any single example from its directory: + +```bash +cd examples/waveform-synth +make run # ASCII art output +make run-csv # CSV output +``` + ## See also - [API Reference](api-reference.md) — full diff --git a/docs/releases.md b/docs/releases.md index 6655693..a6072df 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -21,10 +21,10 @@ README restructure, accuracy table cleanup, expanded cross-compile support. - **`FR_CORE_ONLY` convenience define** — single `#define` strips both print helpers and wave generators - **Accuracy table cleanup** — removed LSB column (percent error is the user-facing metric) -- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build -- **Two-column size table** — Core (`-DFR_CORE_ONLY`) vs Full for every target -- **`scripts/update_sizes.sh`** — auto-patches size tables from `build/sizes.csv` -- README reordered: accuracy table first, then function list, then size table +- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build +- **Three-column size table** — Lean / Core / Full for every target, sorted 8-bit → 64-bit +- **Consolidated `scripts/crossbuild_sizes.sh`** — single script runs Docker, builds, writes CSV + markdown, patches docs (replaces three separate scripts) +- README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions --- @@ -309,6 +309,5 @@ FR_Math has been in continuous service since **2000**, when it was written to run 2D graphics transforms on 16 MHz 68k Palm Pilots for Trumpetsoft's *Inkstorm*. It has since been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and -16-bit embedded targets. v2.0.7 is the current release with a -full test suite, a bit-exact numerical specification, and CI on -every push. +16-bit embedded targets. The current release has a full test +suite, a bit-exact numerical specification, and CI on every push. diff --git a/examples/README.md b/examples/README.md index c06fc5c..09f2e7f 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,5 +1,8 @@ # FR_Math Examples +This directory contains examples on a few platforms for seeing how FR_math works. For embedded platforms use of the built-in printxxx functions can help provide readable output in various radix formats. + + ## Arduino Examples Each example is a self-contained `.ino` sketch that prints results to @@ -13,17 +16,31 @@ or PlatformIO, then open any example from **File > Examples > FR_Math**. | [wave-generators](wave-generators/wave-generators.ino) | sqr, tri, saw, PWM, noise, ADSR envelope | | [arduino_smoke](arduino_smoke/arduino_smoke.ino) | Compile-only smoke test — exercises every function group | -## Desktop / POSIX Example +## Desktop / POSIX Examples + +Each desktop example is self-contained with its own `Makefile` and `README.md`. +Build artifacts stay within the example's directory. | Example | What it shows | |---|---| -| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms (requires ``, ``) | +| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms | +| [fixed-point-basics](fixed-point-basics/) | Educational walkthrough: radix, conversions, add/sub/mul/div, saturation, formatted output | +| [log-exp-curves](log-exp-curves/) | Sweep log2/ln/log10, pow2/exp/pow10, sqrt with error tables vs IEEE double | +| [waveform-synth](waveform-synth/) | Wave generators + ADSR envelope with ASCII art and CSV output modes | +| [trig-accuracy](trig-accuracy/) | FR_Math vs libfixmath trig accuracy comparison (requires libfixmath source) | + +Build all from the repo root: + +```bash +make examples # builds all desktop examples +make run-examples # builds and runs examples 1-3, plus 4 if libfixmath present +``` -Build the POSIX example with: +Or build any single example from its directory: ```bash -make examples # produces build/fr_example -./build/fr_example +cd examples/fixed-point-basics +make run ``` ## Using FR_Math in Arduino diff --git a/examples/arduino_smoke/README.md b/examples/arduino_smoke/README.md new file mode 100644 index 0000000..304683e --- /dev/null +++ b/examples/arduino_smoke/README.md @@ -0,0 +1,36 @@ +# Arduino Smoke Test + +Compile-only smoke test that exercises every major FR_Math function group +to verify that `FR_math.c` links cleanly on AVR (avr-gcc) and ARM targets. +No specific hardware required beyond a board that compiles. + +## What it tests + +- Conversions and arithmetic: `I2FR`, `FR_ADD`, `FR_DIV`, `FR_DIV_TRUNC`, `FR_MOD` +- Integer-degree trig: `FR_CosI`, `FR_SinI` +- BAM and radian trig: `fr_cos_bam`, `fr_sin_bam`, `fr_cos`, `fr_sin` +- Inverse trig: `FR_atan2`, `FR_acos` +- Log / exp: `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10` +- Shift-only variants: `FR_EXP_FAST`, `FR_POW10_FAST` +- Roots: `FR_sqrt`, `FR_hypot`, `FR_hypot_fast`, `FR_hypot_fast8` +- Wave generators: `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` +- ADSR envelope: `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` +- String parsing: `FR_numstr` + +## Building + +**Arduino CLI** (no upload needed — compile-only test): + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/arduino_smoke +``` + +If it compiles without errors, all function groups link correctly. + +## Expected serial output + +If uploaded and run: + +``` +FR_Math smoke test: all functions linked OK +``` diff --git a/examples/basic-math/README.md b/examples/basic-math/README.md new file mode 100644 index 0000000..5d1cc20 --- /dev/null +++ b/examples/basic-math/README.md @@ -0,0 +1,63 @@ +# Basic Math — Arduino Example + +Demonstrates fixed-point arithmetic fundamentals on Arduino: +conversions, add, subtract, multiply, divide, and utility macros. + +## What it demonstrates + +| Operation | Functions / macros | +|-----------|-------------------| +| Integer to fixed-point | `I2FR`, `FR_INT` | +| Addition / subtraction | `FR_ADD`, `FR_SUB` | +| Multiplication | `FR_FixMuls` (round-to-nearest) | +| Division | `FR_DIV` (64-bit, rounded) | +| Constant construction | `FR_NUM` (build 3.14159 from parts) | +| String parsing | `FR_numstr` | +| Utility macros | `FR_ABS`, `FR_MIN`, `FR_MAX`, `FR_CLAMP` | +| Radix change | `FR_CHRDX` | + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `basic-math.ino` from **File > Examples > FR_Math > basic-math**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/basic-math +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/basic-math +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +**PlatformIO**: + +```bash +pio run -e uno +pio run -e uno -t upload +pio device monitor -b 9600 +``` + +## Expected serial output + +``` +=== FR_Math Basic Arithmetic === + +a = 100 +b = 37 +a + b = 137 +a - b = 63 +a * b = 3700 +a / b = 2 +pi ~ 3 +parsed "-12.75" = -12 +abs(-5) = 5 +min(3,7) = 3 +max(3,7) = 7 +clamp(9,0,5) = 5 +42 @ radix 12 -> radix 8: 42 + +Done. +``` diff --git a/examples/fixed-point-basics/Makefile b/examples/fixed-point-basics/Makefile new file mode 100644 index 0000000..e0eaf83 --- /dev/null +++ b/examples/fixed-point-basics/Makefile @@ -0,0 +1,29 @@ +# fixed-point-basics — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = fixed_point_basics + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): fixed_point_basics.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/fixed-point-basics/README.md b/examples/fixed-point-basics/README.md new file mode 100644 index 0000000..eedd81a --- /dev/null +++ b/examples/fixed-point-basics/README.md @@ -0,0 +1,63 @@ +# Fixed-Point Basics + +An educational walkthrough of fixed-point fundamentals using the FR_Math +library. This example is self-contained and builds independently. + +## What it demonstrates + +| Section | Topic | Key functions / macros | +|---------|-------|----------------------| +| A | Same integer interpreted at radixes 0-15 | `FR2D` | +| B | Integer-to-fixed and back round-trip | `I2FR`, `FR2I` | +| C | Constructing fractional constants (pi) | `FR_NUM` | +| D | Add / subtract with radix alignment | `FR_ADD`, `FR_CHRDX` | +| E | Multiply: precision doubling and truncation | `FR_CHRDX` | +| F | Division (64-bit and 32-bit variants) | `FR_DIV`, `FR_DIV32` | +| G | Saturation vs overflow | `FR_FixAddSat`, `FR_FixMulSat` | +| H | Formatted output via putchar callback | `FR_printNumF` | + +## Building + +```bash +make # compiles fixed_point_basics +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os fixed_point_basics.cpp ../../src/FR_math.c -lm -o fixed_point_basics +``` + +## Expected output + +The program prints a series of labeled sections. Section A shows how +the same raw integer (1234) maps to different floating-point values as +the radix changes. Subsequent sections print tables comparing FR_Math +operations against expected values, demonstrating precision, alignment +rules, and saturation behavior. + +``` +FR_Math — Fixed-Point Basics (v...) + +======================================== + A. Same integer at radixes 0-15 +======================================== + + Raw integer value: 1234 + ... + +======================================== + H. FR_printNumF — formatted fixed-point printing +======================================== + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`, used only for `FR2D` debug macro) diff --git a/examples/fixed-point-basics/fixed_point_basics b/examples/fixed-point-basics/fixed_point_basics new file mode 100755 index 0000000000000000000000000000000000000000..87578daf83b85f39ce37367571c36e77872bcbd5 GIT binary patch literal 35600 zcmeHwd3+RAw*RT_1gg5jl7+20Gvgq#C^7GQs;iTL`ex=c@Avur z)Agac?z#8ebI<*rbI-lEs;KjUyes~k-CmM5z6fT40Y980UJJ*5@P zyXmX@dqtIZp>KeJ>E1_g*_;n~l@(Pz9llmSN)6~2hJGps=wMRKx$5F@)Ksi4ttfJo zRxGPT$+hyrG6M1rs?n``NgfD$ls`(k0`H)|MB6<|7s->ye)P>8)jNr6{0it2b{=N1lu` z&?f_&!=R)Uti*F+tdr6|T=_7fe=LW`P49!-orIIixX(nhF)W21U7%0?+N$c}6=SE5 zgU)&c9crElPT)?7u^x zEtaibdw2Qhcz;WaM^L^R}9{YSii*T{Db z9om@0BPXPz9XQ*s7h29IQOjv<^zJQ?v!xG3Zl6x>*6+I5TaQ!3NdJv!{~^I^|0<~8 z&XN8G@P0LqS`?nsBmMUTbyzdi??W0ooEU@<_?(9PO4}z!`FE97@0aGp4f8P;ozkIZR1lm9O_5&xXw6C2TpLOWmc%l8An0n}h z5!d#4LQ9F|Ui-t)@4_fmA34GAge>XG#S`sa-l+B`spUdY_pmYkLX?SFJ@wDucelNo znmvOwUi;Vis*L?({PV%5^zYH@{hb6Gpp9-D6d3&$mUFt^E;W1Y9ixO6(OhpoDYT#X zDyYX!qdyBgLs0MHt*YDtlYa`=qvI#q`m~>uj^7!m>%G%1&1X3(-xnr-D)X&Dn?alp zCi;_*r|5KdlK%tf)ZMR1$^OwOvrzj9$$PgV(+6E7y01s(&SZZqc!r|wOIl#4_1dK? z&Gz;`QcFh=pX3yO6nHLyR_yjsil1%M4|(Fj`xV~qHiwS&_eH*<&z`aVebDFYpd8~k zKXIL#-#Y=F&b91rv|E93cmKTO{FnMd|F48+SKa)Ra|^)3a=EQ@$iEnrlacEGD`=5a zIX2+&yI%T!nd<*GXy05R(qw(`M8oKobJhnBoio8#I`ni|dhpN*PPbnl z#5v7>3j6^~)BPCY@ZDe82hmPD;VR&IN(*1t(r$&PWSajt_|Gp?>!*(QzXe*PXYK7l z+uc3h--^8M@nO#JzY030>`^=66mY)KTjnd%{4cZ2pzmGD@b6`r@^-tW`B_i5&!uMi zpF;lP*D-ebQfK#gJe28w0{M!5%@h1kd`sICzdO;UN1sn@ws#1b(vt8rSmVmZ;*ZHGbXHD-q`zQO|YMtKgXd!zi+Bv1xarLgVc#6O1 z2ighNIjq*X13U^&IXC(PX|KI={Y=E;9K_`rEteeATM(!H2epnGPLN#Yy-fy8 zSutXfIZvz?^Q1O&o~d(Sp6-EwoTEn{wl}dHVmV&JS;R({D%Y6SXprauyD4qEL02nA z>udW*9~*Ly-4^cCyZYfw@#$@XK36nF#ES9eNMp7|Ptz!hvHVBjX)EcJL;< ztMz%0zBXx_t8?*5mmuoPYllWR)#mCWQkw0q9TBm%+L(ODoGf=qn$K|L{DCN2{enn6 z>c$B=R|)!_D69Y-5ukyU4s=}of;1b_dg$-kyWL=dTodvmK%>lU+1p?!-Mh(fWU$w+ ztJTHY2B9y*e0ocQFZX+W$siBr{Q`-6cF4(%qbQFkG#KJK1-Ga{e$In-6EM@!-FDF= z#fr84^9M8L8~W$Rg_9d)c2jsmVVp$nb}i}+G`mla_Zb)`tkb?UWea{?ZQuN)Tgukh z!YR^(t2m6JOu$wrdhJG&7+Z3kPcN>G%a^87bQXMqp70eaI1e;OGUo4hbq(=`QATgv zvsAx>|K;`eJ@zT_W?$j8KBb2ab=m1|TfNWl6#OX({-m#c3U=|F`~v=V41HvKuwPly znha7ge5pPxFG~>J?bL&V5g1{O6Jtcbv962}Q?m2K+F^N;5EjY2CEz`+6?e1#o$C*@ zbZW_;(be^csP?6Eee15rxjzzRoS*j^#v{h$4D$b@zC~kP)oI9Gvf!bAyKixwTK6XC z$?_O$Y?)%D!-27rh<-TKKA*vODFZGR^qF~O_}p;j#W*!#oF;Z^BOQ>%_;din!{B9n z6uHm&3M0X*tL>BT0PaTME@7PMFwTx*oS87rY#3)Yj5Cv<%M}y!$~dFif%&%rFGa>q zZ(-;9Y{Z|}cPakFzN)1H9emXyNQ$o-1(w?{zch?8CD>R8pR-xR{kgZ42K)>@XGJ>b zZ;9}?vg;c4VyzTwh0lpAhUX)eMT)h3qs2(QUW_E9wb71F#3mkpY-7Y}_UlpMXd8a- zfUh46qXLT%mmb%T-1Xmf6*w>!M0zGF4*XVN06mVg9!b#2a*Jz?3wqdaHNp37DA&XH zb5o>P{dGND`-HUPkAwO==cKfm7KmS*LJc|8AK6t_; z9oeePyNKB`^8SDNDvxmy@h3ROgTA{1zKe0>rUEavH{K{QzSR&%#j}X7N68%t~Kq@#p+HTh~jyb$F<~Mke8U9kSFcgY6s7JV)i+`{Fq z0wr#qy4P;|_<%-ocf8>B;im#+u#FaD&gpFj+;Ay40C|7Msn`%>oNX7hBg?Go z(;Kho{}nHuymHEjo9;gcMP2|l1;{xA3h_i;Wj$Np!x6aFosA13%Usf##lFhds!|E*WL3UIF}#&@nfPgKVX zuaVlJgXnFsi}5kQO~Sk$9c{uI)0T;KW}Z*4tdj&yB+vOl+n&}+%DTt_+cbA!&V+4R z;qO>SrZKJ(aJ|U3fxhC&10J77#5^6AAJ@{)?St+jJUuiYFzt#%76Q@$1F5!sbtl+h~c#j4-S(C_Z!TRgWm-pK_ z{{01hW#qMw_R_>Q_#(x>w>1WP_|dR4`$5t`_yVtAuy#)t^!iP;LiC&u%h$vS{amir zO$O6@o1ll@l>{3mp|5(_CQHx&L$(pdF4ku(i?BW!fNLNR^B>Z@7E{(iSo39ZUHF>F z>p$nPd@PBNZI?ne$LH;cN#c8m--yq3Q^>sw`W^;OEKi5=Y3>&`m;0S99h^ov zJ1S`ZUeIGkSmKR(LOXiMlfWl~4||*A@xAyY@EO5ZI)(Bm{;(mqZ@#eb4Q1`xOqV7$ z6J=ohCsKUt*8Q3QKI}W+AIqBD;J(Y4&}Oi~A58H_3=ZtCc^|L>>*K{5$_u%+-5%@9}HosVFS z>Ysm@DD%|1xcrMac|R5xX?8ck#&OwZ_Z#q;QyYcoi#Vm)$b7L@$ZzQC$~>(dy!%t_ zklkgtPveci9&Lm9p)So}*PK4UcBA_txnJAXI7)KQ?n?zF(VOa1|4iS; zLZ#jQQ-d+<)7jHP#A24XAJ*P-@&LCtaB0T#e=Zx0qENVyxB%{ z$XnQ3P6zZah5qapz5C7ml(0q?iTxjYHLP(u*Ux}IOov}&<6h7q#SeM^5J#dC`x3*w zg&c!yd%cD%33$u8R+gs0TS(gyWQQ>`%m)ZhYV{x+fg+@b@sZF_0XTv@%w^kQzOQ0z1iH>i|>+5eioYYjFVm7fo z%CHx0G&Sm55Vx{w$lU>dL_CjTOhKppLZJg|Oe@BbmdA=#%C(w;V@k+PG#xP*(Kg=~ zG-C{NU96+sM^Trr!F`cLyX<%d6(hqpnP->Q8-;%F7O{WJ!v5L>Tkk>~l4>YYH@Xd2 zVE-&*UUQS%T91CNMLzAf!!~+b?a+MF?G%Mreca{L`^#_Hz262oWqcJPj#_Iatk)Nv zohw2xynuUU8uPAI4^oCte=l1T1Rh&#J}`cu2_r!RUs z5N{J}cWt|fXIojD1-G?x=}8~3Y2SFWz5rwRY54jjZOm>8^V%0{vQ7={OYR*g&kVaX zu|r4hr6_OL;T}iL2xT6N52JYvXlF(^%^Nv<$K3eG9-nK9#2Ny)I3mgIWZAH*HZVS( z35z^9ZRzlxa~EecSD1%8D+DuD&|^FLi&G=|^BkC2E5_Ccl!+&!F?enk#cN098Qzty z3C-aFo%p%jAK>TwT<-<+>k;&85#nZ>?mf&yFTg%aPa1a&jTOU(r{e)-6!(Mc?ArLk zwgY1jpHTia)~!A~S38Zq{!pB|Qu$KLH_HXYz3)Gx91Ojh`_aCyv|*SZHM=hYcRX|1 z-8S-dg9F%KI*EG>xK4%*rd*)J zZ-8BR&2>RLT(L_t`f~~~p6$YUk6``J*r@rDrFz&2v9Uz||ISVmVW-uY?|IH*KYJN= z*akb?20K{v@52r+t9BUPZHJ(K?Hga-)}`9vqG|{1_ms8V5bZGCLA5VyVY6?JZL)uK zY>($w$ZgjR(MwoYm=fMK4CyQCjI~p+_geCh$b04|Vq?WtiiEwRxt+;X0{t+aBbJOgWsv$&RO0PIq#kwj{b+nv0%90q4c9*vMR}h@Q5rv2KCK-DodVBD zf@?iIE8HiNd9soB^tgYg>>*L#2_F!Ln-;aBa+@-)NEYiA87vtTUE0`yjwxZAA z<8>*o!)HSuQ*A`73;8A?NuPV2dD04`517b31HQw$2j^7guX_}|FM%kO9Px7D!Wp6Cu*)8J6Zd2#< zleph>R*Bzuj&6ZX5L=pXKke+<@xHC4;nt=z;~TdU>5k%F>fc51i|8Bnzh~v2G$?b1 zaYOtzlI3k@d2Ylr0^TpScinIA-iMRmQs~U?eYsn_d%HmtBJJ3BM{{fz*Wmd=wmHv6 z*ar%D&XI~eANC7pASX-ExlGunC7}$@Xma^pugLc?kuQQLjv_rG?w18+Wnu0zLO!pf z@LENIWRA?o-bZ;Z(9yNso`t>pV%SieMs_^YuqS3m=36n(^S#;*w8#5rUatk^vF@Gd z+d*Ihzj;yOwHEB9qnA9kaElH2d5oC5u|9cG#QkyJUyApjf7y|C#-8_AL0M)E?TaIt zT(aSD9m@0Ln#siTYogSOH6FPWAx9TMFY^9;I`$)Czt;`oK*$A84%5;hgBU|EGG<<) zeY`F~yOS|rD|ztceLgjhd9V*re4U^F%*MKqbtB9%OgrIwc<%9{Zl~E~VO+W2kaG;> z60Rkx9QKoVwI951EzdAhTzbZM0=8&@S%nRJvsHg)VI${s7{J69CT zU?^T*?2(<-vd3A3T2*ptN?IoT-R@l7UAoFyUQ;|pPAJmpP?C^YgrBsuv6;(c9BJbc zQjjxEP8d6BnU1E9m96Qs?D9BymOb4ndn#)xic;_n-AWm+zO5?uRF&clI7L31<5~fs ztf?%g+$bxxjUbZ~icEnV7GzxJ>B(4DUg@k(DR$SCu2K}50EHBll6)FjImJ2 zG56*<3P8$hc%GW2)$%frb7?hna+b4#Rn^XlYF2M$X*VWNaSCL500^s_EXzv!N{=T1 z0;8vR89F~<8PJ8osbe#*uKFVtnfZII{0Q&WjpU4#vx|yKS!=*tRkNg8?Ss6!6b_&m zQg)V?F0WWoTv3f?6jKMXjX-q@H!aKQP&7$S8=Ib~bR|RSigPvF8Tt6(-l9)BSB{o8iXB&aikFtMOUXr*HA~9zre-pmd4|QF zvk09Wk1i&nqv&GL!_gPpoh>_q@V={x08qOkR zO!ne|wul!T{N^o~X`eNF-op59OLVsZ)l`u^>{S7Mii-m5P(2a)_cXPlrks)G17$zj zKtS!DzQe_xRdQ8Y=Ge3e0lj)PWKn|AO1xQIJ-24XEJf~4 zz{CDI&Z}fjQe|fTLz&lbCRDMufoK)zWFXSZIC_S=F>PG>9Uk_Q6h738k_K@V8OTgY zQF*!as`5ZaKu(W)jt{kksTJdcw(xjxo)>U+%(3Zu2phgtf3 z*w-a}tJ@SYy#G@JrbMn=DK6=! z$&q948+RqOBK?!`b2DF>kT7ZUx;_C34U)D@t_2H`W)!(kZ zXN_)c-BKU@wutopM+Pj3?2LLeX2Rh2hO8PkEcUhG%SH^8 zUyUyx6_@btn9W9OqLlo0%9e49(^AKWXMQ~4)k!<1G)`T6V}-eNx+|w@)?JpZw*9j| zncF{aR{niAzq4>m;XSwg^^T>^&zE|ZMU?Jyl~s&$YpdH=eZ2Nh8(0H@j%Wnh7A{b= zD9{4EKw13-N{tjKF-D-|Ap%Xnn`oBn1S*mR^5KTfzA*xw!KT2NBGAfIfm+84WSA(> z#wp03CXh2*pigrID#M28yIjbgC(w+1fzB@w=$S%+R^vr!%MyV`FB3>tCeXJl1iI)J zs0%k&BG=(P@r?rAa;HH5aj!tXZ4@Z>eu1hU66k|R1hPIR&|A#{&3{6mKeq_f_$z^? z{08-35a{(+1lrmvP~BSsd5)saae?l9PoSTFB+%hc1k(Nq8<{f#{o-!|_5E6)4c`R4 zk?1A7p?*7VtewRIxCk$Swc#cCVOY2?z)R-dB>caHRHE&fco2Xa^=q)%n2im^Kr7Lk zbBN09aOj0Z4Y$It7ZX($5&gD=NM1oy>?ZnI4bhRcL}zL-cIt`5d-2|WBhfdTi9Yia z9sCL8{*>sBpAn7NLG)#Sxd%?;W-Ws#7iE{z~4N50C^vNl<3p1!|BM@8<-FGyNUmw4Ioht zzVevn3(CWA4a0NlFo+3j2Zfuf%E|v9h7YEDAK=5g|AWysODF9!;Uru_11m++m?aXb zp;$BSCg0YdH17@|;UH`@DUyT_usKSNrm)mOu*V=09vDR0{X>YpFpy?yENPpEQ}~t< z6gC_iDg0x-LPrRJ5CS0tLI{Kq2q6$cAcQ~&fe-@!5&{Fg$|{`(det?*_g|_WVh$k? zLLh`d2!Rj+Ap}AQgb)ZJ5JDh?KnQ^l0wDxK2!s#_ArL|!gg^*^5CS0tLI{Kq2q6$c zAcQ~&fe->A1VRXe5C|aA1paLVWJs8g^ERCP-LAm50Fc1Z`vY6}Hh%Y^ zq|Nw?lIHF#rJjHf2nhIwmoOUnfzL-S!?mV*X^GRLB&q@x1P8sq!nJg19Da`T&%tr{ z+893NN&TxRnlyp0e3FDOCelzX?Mea*zS*XweHr+<#6SC|3h7WmyR@>3CRCMH&<*$; zFU{0a^JJ91YZDyHoK@A6=nfEB$hpEl%{QW%v{b4|r*)*Mpp})&sZL9GTTpgrDt>lC zBKc(?7QzSWG?lB<>0v?r5GU;xpi>6E|AsGy(#Qg5g_Y8rG^b?k zO8hzdrRC%kDr#0#ReQ)Utfc)ymD^KIM;Igs_s+r3Y5w_=e~#g^b77?wrPXw&##3Bg z?5rxL`!x8I>MLoJ2H!(oj?XTWUjs%1xDSm%Czd!@&`X-4;^lMz`5zbn>LDaP9|DHY z@#QpnW*B~6F{!M7QdrM`W7lx7{tb!Wj{rk+vdVC;mSr_*(ffEXJb=V2v%zrQhM&80 z@pIY+R_AKkt95(md6a+7xsT=wjun-jl_m6_S_-SEEQRV%YFDmaL3^}S^s*M((=jde zbrw~59Q3e;&2@s6aa312xD&tA;Pah9;*{NYu{#?7-xvJPfAEBvs2>{`vr1W2C zt;A8hrg&*hb+MzQ*jWT*z7v)~_Id3JCn`T1^xJmn8NDU!0b%7a;V~iNxUf&NNApVf zCCwMYnr7`lqwEbEp0g|LQEkNUg?)n2__^l1)+gMpnH>=`FlP3moY^t>E*&3(VKRI| zksif&>v(*OQR&kvovG5*_}(Cw=c=@r5TI{o8cNN_hxNGJt{3|MbmrBbjy-B6R z5I30r5tYtS+izCs(Q=^tS1Rp`574iu^oCIZ`iM#oRrNio&;etz{6DMZUJN6qzgFqf zD%}T}jOUjsjn8H(^f8rAQ0Ybe0`>9POr?CbN>5hloajLLOqKpz^^bxet@_VWl{WSd z@Viv{8T1x$} z2UPlQl|H7@i&gqVm3~yE|68R$SLw?t&EJMZlDhB{_y+<b~N0VS8T}cIkH0LQPb*f;xJiuCBxg67k zl2N%lP)CjCoVSu^0wtkN202$zv#z2n3Z;a9#}5QA2V$2J9F_RrO$Dcc?hIu%=%#{` z0%s^44qTOps^&R3pmVxcuw}AWxMfm7DRAwsjlAyK3V0~TwY!|tLFr164#a&Hr;r@} zyNN2y89fB%j&6eIhTeoakvNn%{v#_0hof-z#HwQDGyL;&mlD&Hu*h)mDkDIqWpEcg zX%p001&x3yEfw08vKc01tPSLhUk+R5Tjx3ovZp)pdgT>l2lD0x@+LCh{CNx0Ve;%5 z%-t)0_Pm?Y9dokh76kJPCxbiSEHg9VDexoZ_~zq?gBS_hLGUo)BL;aq8k$Ih!J|<= zyXrzdhgd!^>ou&DHTazBKXr`!Qe_jAuY$u(@y&1;jsK#I=fYd|56g}od#3eBecLvv##M9=*`-*OTkp2fT +#include + +#include "FR_defs.h" +#include "FR_math.h" + +/* putchar callback for FR_printNumF */ +static int put_char(char c) { return putchar(c); } + +/* ------------------------------------------------------------------ */ +static void section(const char *label) +{ + printf("\n========================================\n"); + printf(" %s\n", label); + printf("========================================\n\n"); +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math — Fixed-Point Basics (v%s)\n", FR_MATH_VERSION); + + /* -------------------------------------------------------------- */ + /* A. Same integer at different radixes */ + /* -------------------------------------------------------------- */ + section("A. Same integer at radixes 0-15"); + + s32 val = 1234; + printf(" Raw integer value: %d\n\n", (int)val); + printf(" radix float-equiv precision (1/2^r)\n"); + printf(" ----- ----------- -------------------\n"); + for (int r = 0; r <= 15; r++) { + printf(" %5d %11.5f 1/%-5d = %.8f\n", + r, FR2D(val, r), 1 << r, FR2D(1, r)); + } + + /* -------------------------------------------------------------- */ + /* B. I2FR / FR2I round-trip conversions */ + /* -------------------------------------------------------------- */ + section("B. I2FR / FR2I round-trip conversions"); + + int test_ints[] = {0, 1, -1, 42, -100, 32767}; + int n_tests = (int)(sizeof(test_ints) / sizeof(test_ints[0])); + int rdx = 12; + printf(" Radix = %d\n\n", rdx); + printf(" int -> I2FR(int,%d) -> FR2I(fr,%d) -> float-equiv\n", rdx, rdx); + printf(" ---- ----------- ----------- -----------\n"); + for (int i = 0; i < n_tests; i++) { + s32 fr = I2FR(test_ints[i], rdx); + s32 back = FR2I(fr, rdx); + printf(" %6d %11d %11d %11.4f\n", + test_ints[i], (int)fr, (int)back, FR2D(fr, rdx)); + } + + /* -------------------------------------------------------------- */ + /* C. FR_NUM — construct pi (3.14159) at radix 12 */ + /* -------------------------------------------------------------- */ + section("C. FR_NUM — construct fractional constants"); + + rdx = 12; + s32 pi_fr = FR_NUM(3, 14159, 5, rdx); + s32 neg_half = FR_NUM(0, 5, 1, rdx); + neg_half = -neg_half; /* -0.5 */ + printf(" pi at radix %d: raw = %d, float = %.6f (ref %.6f)\n", + rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159); + printf(" -0.5 at radix %d: raw = %d, float = %.6f\n", + rdx, (int)neg_half, FR2D(neg_half, rdx)); + + rdx = 16; + pi_fr = FR_NUM(3, 14159, 5, rdx); + printf(" pi at radix %d: raw = %d, float = %.6f (ref %.6f)\n", + rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159); + + /* -------------------------------------------------------------- */ + /* D. Add/sub with aligned radix */ + /* -------------------------------------------------------------- */ + section("D. Addition / subtraction with radix alignment"); + + { + int ra = 10, rb = 10; + s32 a = FR_NUM(2, 5, 1, ra); /* 2.5 at radix 10 */ + s32 b = FR_NUM(1, 25, 2, rb); /* 1.25 at radix 10 */ + s32 sum = a + b; /* same radix — direct add */ + printf(" Same radix (%d):\n", ra); + printf(" 2.5 + 1.25 = %.4f (raw: %d + %d = %d)\n", + FR2D(sum, ra), (int)a, (int)b, (int)sum); + + /* Different radixes — must align first */ + int ra2 = 8, rb2 = 12; + s32 a2 = FR_NUM(2, 5, 1, ra2); /* 2.5 at radix 8 */ + s32 b2 = FR_NUM(1, 25, 2, rb2); /* 1.25 at radix 12 */ + + /* Wrong: adding directly without alignment */ + s32 wrong = a2 + b2; + printf("\n Different radixes (a=r%d, b=r%d):\n", ra2, rb2); + printf(" WRONG (no align): raw %d + %d = %d (%.4f ?""?)\n", + (int)a2, (int)b2, (int)wrong, FR2D(wrong, ra2)); + + /* Right: use FR_ADD which aligns b to a's radix */ + s32 a2_copy = a2; + FR_ADD(a2_copy, ra2, b2, rb2); + printf(" RIGHT (FR_ADD): result raw = %d (%.4f)\n", + (int)a2_copy, FR2D(a2_copy, ra2)); + } + + /* -------------------------------------------------------------- */ + /* E. Multiply — precision doubling and truncation */ + /* -------------------------------------------------------------- */ + section("E. Multiply — precision doubling, FR_CHRDX"); + + { + int r = 12; + s32 a = FR_NUM(3, 5, 1, r); /* 3.5 at radix 12 */ + s32 b = FR_NUM(2, 25, 2, r); /* 2.25 at radix 12 */ + s32 product = a * b; /* result is at radix 24 (12+12) */ + printf(" 3.5 * 2.25 at radix %d:\n", r); + printf(" a = %d (%.4f), b = %d (%.4f)\n", + (int)a, FR2D(a, r), (int)b, FR2D(b, r)); + printf(" product raw = %d at radix %d (%.6f)\n", + (int)product, 2 * r, FR2D(product, 2 * r)); + printf(" ref = %.6f\n", 3.5 * 2.25); + + /* Truncate back to original radix */ + s32 truncated = FR_CHRDX(product, 2 * r, r); + printf(" FR_CHRDX to radix %d: %d (%.4f)\n", + r, (int)truncated, FR2D(truncated, r)); + } + + /* -------------------------------------------------------------- */ + /* F. Division */ + /* -------------------------------------------------------------- */ + section("F. Division (FR_DIV, FR_DIV32)"); + + { + int r = 16; + s32 a = I2FR(7, r); + s32 b = I2FR(3, r); + s32 q64 = FR_DIV(a, r, b, r); /* 64-bit intermediate, rounded */ + printf(" 7 / 3 at radix %d:\n", r); + printf(" FR_DIV (64-bit, rounded): %d (%.6f)\n", + (int)q64, FR2D(q64, r)); + printf(" ref = %.6f\n", 7.0 / 3.0); + + /* FR_DIV32 works when x << yr fits in s32. + * At radix 8, x=7 → x<<8 = 1792, well within range. */ + int r8 = 8; + a = I2FR(7, r8); + b = I2FR(3, r8); + s32 q32 = FR_DIV32(a, r8, b, r8); + printf("\n 7 / 3 at radix %d (FR_DIV32, 32-bit only):\n", r8); + printf(" FR_DIV32: %d (%.6f) ref = %.6f\n", + (int)q32, FR2D(q32, r8), 7.0 / 3.0); + + a = I2FR(22, r); + b = I2FR(7, r); + q64 = FR_DIV(a, r, b, r); + printf("\n 22 / 7 at radix %d:\n", r); + printf(" FR_DIV: %d (%.6f) ref = %.6f\n", + (int)q64, FR2D(q64, r), 22.0 / 7.0); + } + + /* -------------------------------------------------------------- */ + /* G. Saturation — overflow vs saturate */ + /* -------------------------------------------------------------- */ + section("G. Saturation (FR_FixAddSat, FR_FixMulSat)"); + + { + s32 big = 0x70000000; + s32 also_big = 0x20000000; + s32 raw_add = big + also_big; /* overflows! */ + s32 sat_add = FR_FixAddSat(big, also_big); + + printf(" Addition overflow:\n"); + printf(" 0x%08X + 0x%08X\n", (unsigned)big, (unsigned)also_big); + printf(" raw add: 0x%08X (%d) — OVERFLOW!\n", + (unsigned)raw_add, (int)raw_add); + printf(" FR_FixAddSat: 0x%08X (%d) — saturated\n", + (unsigned)sat_add, (int)sat_add); + + s32 x = 50000; + s32 y = 50000; + s32 raw_mul = x * y; /* overflows at 32-bit */ + s32 sat_mul = FR_FixMulSat(x, y); + + printf("\n Multiply overflow (as s15.16):\n"); + printf(" %d * %d\n", (int)x, (int)y); + printf(" raw mul: %d — OVERFLOW!\n", (int)raw_mul); + printf(" FR_FixMulSat: %d (%.4f as s15.16) — saturated\n", + (int)sat_mul, FR2D(sat_mul, 16)); + } + + /* -------------------------------------------------------------- */ + /* H. FR_printNumF formatted output */ + /* -------------------------------------------------------------- */ + section("H. FR_printNumF — formatted fixed-point printing"); + + { + int r = 13; + s32 z = (s32)(123.456 * (1 << r)); + s32 zn = -z; + + printf(" z = %d (raw), float = %.4f\n", (int)z, FR2D(z, r)); + printf(" zn = %d (raw), float = %.4f\n\n", (int)zn, FR2D(zn, r)); + + printf(" FR_printNumF(z, r=%d, pad=6, prec=3): ", r); + FR_printNumF(put_char, z, r, 6, 3); + printf("\n"); + + printf(" FR_printNumF(zn, r=%d, pad=6, prec=3): ", r); + FR_printNumF(put_char, zn, r, 6, 3); + printf("\n"); + + printf(" FR_printNumF(z, r=%d, pad=8, prec=5): ", r); + FR_printNumF(put_char, z, r, 8, 5); + printf("\n"); + + printf(" FR_printNumF(zn, r=%d, pad=8, prec=5): ", r); + FR_printNumF(put_char, zn, r, 8, 5); + printf("\n"); + } + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/log-exp-curves/Makefile b/examples/log-exp-curves/Makefile new file mode 100644 index 0000000..6279a60 --- /dev/null +++ b/examples/log-exp-curves/Makefile @@ -0,0 +1,29 @@ +# log-exp-curves — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = log_exp_curves + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): log_exp_curves.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/log-exp-curves/README.md b/examples/log-exp-curves/README.md new file mode 100644 index 0000000..133cd7c --- /dev/null +++ b/examples/log-exp-curves/README.md @@ -0,0 +1,63 @@ +# Log / Exp / Sqrt Curves + +Sweeps the FR_Math logarithmic, exponential, and square-root functions, +printing comparison tables against IEEE 754 double-precision reference +values with per-point and summary error statistics. + +## What it demonstrates + +| Table | Functions tested | Input range | +|-------|-----------------|-------------| +| Log | `FR_log2`, `FR_ln`, `FR_log10` | 0.25 to 10.0 (9 selected points) | +| Exp | `FR_pow2`, `FR_EXP`, `FR_POW10` | -3.0 to 3.0 in 0.5 steps | +| Sqrt | `FR_sqrt` | 0.25 to 100.0 (15 selected points) | + +Each table shows: `input | FR_result | reference | error%` + +A summary line per function gives max |error%| and avg |error%|. + +## Building + +```bash +make # compiles log_exp_curves +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os log_exp_curves.cpp ../../src/FR_math.c -lm -o log_exp_curves +``` + +## Expected output + +``` +FR_Math — Log / Exp / Sqrt Curves (v..., radix=16) + +======================================== + Log functions (input > 0) +======================================== + + input | FR_log2 ref_log2 err% | ... + ... + FR_log2 max |err|: ... avg |err|: ... + +======================================== + Exp functions (input -3.0 to 3.0) +======================================== + ... + +======================================== + Square root (FR_sqrt) +======================================== + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`, for double-precision reference values) diff --git a/examples/log-exp-curves/log_exp_curves b/examples/log-exp-curves/log_exp_curves new file mode 100755 index 0000000000000000000000000000000000000000..92619fb9c4e715d3fb5a63cf4db9505feb09f4a8 GIT binary patch literal 35824 zcmeHw3wTu3)%H3wlW=Axf!w%FU}geDXA*9K011ehaIv{@$s}@#mq|zhNhTL&k|?(! z6G0ggtWKqZL`$ex6Vz7vQAMgk(Ascmi=|NYvx>VM(6)Ymzwh~< z=YQrrp0oGbd!4=4UhCa!?Y$=+{_xc&--HnfgiC=t3U{lD=s_|fM>G_725y^ee)1>E#=XzD036?|{)}%b&d{|Hf+ReanqyB-0@%_TS85v(=TZsNMJCxq8v<1;kqo4CvRnK zU1`M(C4mjp-rLtY5x{Z(sBYO2m9YOA_OhE-jw32*;uS^xGcwgK&z7DlvRm>1dJ zvpTB1XPwFmA&-gOQ@m=0+uC(2wWH}{6fwp7iAbBJJf(Oy3bc73%71k@)yF?+s<&&Q zsvX!nmKr_PS9USgdvTev9X3B^{!6M?gEZ@zRB!i4MZ46;gQ?!ZD7##p>OH$q@by!c z;=MeQ+W&~Y2BGfcl4)M(@7RzH)4W&iQngU$R^9l8J5 z{|wb|+rq|KJtl3}AAyZytBXf@qmkC`o968rM_0qaGXOW?KD92i>ePmQRb}>7*J3&r z=}#$Rb_p1_0=FX+<2^7#e`+H21eR|_xt7ZVc%MR97|LjP+uT;jq8`>dm-AbY*97F_ zItuk?W?FiT#uELt8UJ=x3qKZLHBha4u;PXE*yOFOJ<4nAgmf9KDfj624egt?#G?mabydKi~sxEXhWINK3&9b=yA zsV&rV_i@Hw3(6T|p6c8B${t5qxQwxH`d!D^73BRG`vQ3hUva?K_Ssb*$@n@AyEDER zQx?=Qz5=num?B{7X&F~LWn3|)7+ZcE%|^Zz7~2GV{Tw(u({e}EdQ++X493XVavPUQ z_+mUgbabiyuY>zlot_eaC&P6-y$e|>qEauK3-v?#kt6-G4_7BqF3ty*fvY_-uKKX@ zaoV?V^;_@<;!4KgXQG-UUdGMq%mXa-;R+ZF#MMhI3$RL~$!qoD>N9bmz*zard=c#c zE0=w~@H1LZ-d!vs>ckT=PrKmlf=}v(?~>X{5xhIV69bV!vz6*No!W``K&B4#a zrxcQ|B-!6lc%974`gl|6t(Wp{YtL#c;zTau#eBq#d59l#+ppEk!I*R4FR~F|vZ_9v zmRW^(<2|6Xo$P&s-o7wTKwMEEu8{XjCGC>hyrT3LqD|0OCHjEEyTIRHtIFH6dItJQ zZ^!#w9wS00q9A08#!j8tLJpBUI<@JLE^QX_NgH(@;mYK0rI@|z^dr^;+a9OcwX4P4 z<2-(Lz((s~YIU08G_h(yqcsTRFzxo}x(z?FZWLOb8gF`TR!>~+Iq)3;pFz;tvpPJw z=h4n+v=iofzGV~IY2KlYH3(|EWp}d6ZXos;Q{tVTf}1g=g$xbp6C7EM z9^G@mQzG!Bbv*~WIFFwP-i~67Y!Aj&MpCm*T?#BUs&dl=(Sf+urw0>s1V0mF!nm={ zj}gbD=ZdbOxoSZb$-HIYJ*5b$(!8UyG4}8(zCjw?P(s5nvyTfdmVET z^KmA5|JB%{F|QgFJ(1Li23Uy9s(%W1eX+&yHZ8 znK93@Fwe3u&&+}*Ta4FA^Nd^(d3QlyQX4zn1wE_N;d>6YNqAzcDk)zBtY!#m39BZ7 z+YQevSJ5OjY>YMSi&>eYz1fS)eK-T=GLiS=Edh9|7~Z56UFz6OU{0(WmIq%JDZ2Va zi;-Hb7)i)0qisF#O+5eD#^km#u1AKTZ{XertRGNOeuiL}Ug0!&U}T+tp~tjCMveW~}PEb-Rw|&1v8Qc8TISRq##k7Bw*2A=%G|p{GT> zYqU5Dk$xABH!A;ka6FRnVm$Civ-PN7)ahY2XC%t0N{Ns=*n|+T#MM)mSQ}H*R z0x_Mm-H7ewi0x|dni1RK_q6bDc_vo|e26b@nZM~BERO1ce^Tb1ec)kh!aF~eWm*E6 z5c?pL;dcthXyC6+i7}T84&iEVzT`JgK4i`MV82r%XtXR}Coq@!!G5f})Y?E92{KCZ zy{&K@U|goE93Npnh@%s0U<^9mb7KrrKsF;{81tO!-7lqq{({$r9|@GqHY!BS3A6(a z%kuZ5z0Yw=HpCof+xhLtZ8n4k>MQ!6>WgE%|K0a{pfkv>hTamOw{y^2BJ`GpSe@0g zx z0pDtj!wg)r-rDHOWKH-XUDghDY@(}4QwW>0f3c%aP1qrw1+u6y518LT=89I}CD2yi zIVa@xcnsDJT_+y0ZdZu*^T3Da-XVD#?-in(?$t)eKu_HNYtRSUw`xosIveyF2K(+( zP_hl<@3_{RkMo`w&li=sqC8)Cjf8W>e9_%*72{%{H#OpVbhH_3%&bXRXXbje(mF{{ zL~_jc+xC=FEv<`euuW?(VkT_U0lZ@!nZ$ZkL$8-;xG$t-ATM>T4R*vzJFf%Y50Q2y4Mx`5@SCnc5K|n?{c1YZ`{UM zFyWZhOq8JEa>NK;pG#}m-A&S3_|n4Hu#UOD_qqVtBs{^#nW1~Otv?=SV@;bKcSIM4 zeby#Hk)0KPRJQ=@-HOI0v?3FZm zXP{;wI@_rGvZ8ILO)uxrv{%9eAwH(7#F~&2A>Ih zt#oBdD^W7$e*(pIY}>2w>4&lN?YXST_V2q)@twLX;K3Ys zSZBljn)d-!SRWTEDBC};xQ#2eS8)y?0AtwOuU5AAV6V=YV4M|&QZi#BS)}Cs@R2Vq z_;JB@q+1qjF8EL}*xvcT7R0Fjd54H5om^$eyNsLnV}?kJqZu|fq+1+s05d1o3elHw zt6hxV$L+o zz?vWPgmrZ44UZ1L6~uH0jNy6Ur3>UK!!AAI<&n*8+k2j&qN!%CeK7HNQAu; zT#CE~lxyJo6EJ7lmpqL5n;4#nyw?5+)@O-W1C!F&GzMoih@s5C9ql%OC&8u7ON30BESoG>xuDKACS^!`ggGr@ zP7AK=qhHuh}=FDswm?`n`xFy-*1I9QsBik77Ylqh?S*XiO!j1TZKBWCE>%nOD z@n|562{L4wGa;v18kY(FPCyLkj~E1hDXn?Cl%|AkC1oeXcj(S3HQA%VvkAT$F%e!S))C3!@`pv&d$he_haoIb}lpXcC%q&E`(&oGuIdv}~6q zIxC){uqS$PQ|?%A^P$A%$}tu*+oJ+|(I#_~wjF*et)3j+z$4hASf`+qek#$vHD)H} zk&@?%Qk|V?_RlFHJHdQdXF}h6E@;6V=6A7EOo7jZ?Cfhr+1DCnnf>QDZ(~k( zjazT+TC&5brX5yslu7z6fX{STZa?2|!ROhA_5HTi@8tU~U)>r?d)P4{P4|LPD_D*d+=I{%^`W5^R zMk=?pP?2_$&+&Jlev<0Slx__c)x({s&{ zSVKTBwn%amaoez~(l!fSFlUqZ4GHr(Je*w-Xq5NfxYYWD;2!69ua{_VbW!R_qxM|0b zSTSVS1pLSv#pB?2+Sb0@vi}zNC)8iWy4CYdFV5Oxu6N4)2l#W?#l?9c)`+zmTqX#Wg5{93ldus%EZy}>G z4wq#+V817=n!#E%))nUXKj{Yd6E!B+RP4Q$ zJSOs<`I*>Qv4bLE?`ZBPwIAiC0lQ_eai=0$I|y@_*BU*mXTz4WVAGkf?F`sB9lv?J zCC$H^*Vq<;Z7fJ@pb^;1QmEVNM%y&Nm);4P_#AB@@W3&4An3=HZfn^sCw1y@ib}Xg zh}7a8Z8%L#i-!+`ui^JkMp5pEQIymn;cp;h^8G&IU+dwIEcS?GNj34F9_M$`9un^t z0R!SNOUl>4zTsEoD2JX@k(sMBG45^If+e-Q17ijsuSitt)aSt0fVMI~Yf%@5I}5+R%|)A!xcbMwfcj5_ zG1`VWa%}2&SZDuDvu@5nOUf@$-i!AC3uEIwNkfS5ysOD&c`OUD2eIPOO^(Ul=EcwL z%?dxV4QIDn{0&Bov^T|=i~~1Hdt)`>H&^WU%sn%XExB2QeV~BfImTnphyB7?w38-i>}KrK z5>bcWXtMdNSLAa{l#AdoP^44Dd6~bhG{i0w+UIrD1F)&u5}Aj+kMz4hckgy<8uspm zu%S4etoTjCnvfoumx(yfXSLnvkN3~KUh}udviD$Y2cR3^<`p%swO}s|?Q>>@WMn~q zP7`7`)+eusI3MTzrT8GmmmXZRCBcG`k;#~vc zPde){20Bq$SZ3e*eD;8MStcGU`!GK(?Ki{FC-`~X=vV9Sm-ngMZ-(42kD1pkqG)+V zM7i2E0Ovn#nm!xtzHTF|PmV<>ELTZkI4R{r1M-@HeE)!aR6u@EKwcM+*9YWB`0^xQ zk!Qy=KC#6y-A5jS`S>~|r=Q8`6*(n~R869s#>nX~IUOaZ|EQgt-;ekQ?Tomoz40=g zDyNxpI$ur~%PCPWu4}lj;=afPF5tfR67CDQ373Eyn8pz!Uq2E?DPK}$gudI)uKrrH zDQsB(=LSrT?2NiI=Jdeo!B>Z@(}fLtY`D&_-8gE*vm;IMZMWQN+G#cOF)P=Y0ojy3N&hq>8TV}4F^}E@_e^8OJC-dtpOHSjQ19QKg zmz-N}ZOJ>Ff9X#B!kLRo7C%t%)Lnb-?!4#E_nxtxFZ{gdizS~Hcb9xndTi-y%bqRU zR9;n-Ii2u_q z51TR*)T4hlX7jkhr129&CVepZwUixGn{Hcqd$px}hJ9A;9qTf-W$n%RaBlzHJMuQ% z`S!wF3Ld=s@AnoLohf!M4J+Scuc#j9P}X(b_rc1K*RTWv9aad`5+YEYD9{2-qO|@3 zjgJ&4Ax5Clg9Vz5U*R)`3shnh$b-e|o?8Su3x{YLBhd2k0(DFjNH;~GwNp_(U7#XN z>W^j#RDoddO*Yz|C(z71fi5f%=*0qo?!Oo9EfHwcQh_uT0)1U2&}D}}y;xXAuEO8z ztrckT0|NcF`u1bX0vKtFj;phF)Dr2H68{H#Df{ZycS7X@1Ll^=~n`|uZr_h3_T z4wJtGKY(}Q57~yoX)VAX3Az*UccS3bOUZMj(M!QcC-TM@g zeh1N?b`mwV5siAD==ERV?+^cj=-roy7Vjf^`!ym<2T|LbMEyGPx2s2pe(`&v%Wvav zCQlGK-ywSDKjF+iAiDSwQS`?+wCuv)i=HEz{dc0w&x!81Ky>?M{B7q~M7kcx4h#r* zYH&kz!WkAt;0LZ&=-)D90J$IEO7zji5IVeV+8tKgoT9oi<7xM1V@}O7<2d8&70dC= zb2#gaGwYrAmDU=Kru*V+M;o0*CFLuo$CsEjG-2%cu_?s-OY5tP>&o$G%SKaq_44{U zs)b>5UZ;^(iwg|#i)-0Pq&eEm6OsUfuFO^pN%Bp3Sr15?p z*~^A~7@T@|yd7Dv>fMn7wGeH2l&P%ej93yoItpz~I-%*a`o$ zf^*-J;OHEJ=Q-+&oTWx*O--HA1Q}`pA0G^o#>+TJmT(|FQ);EbGm|8^lwq@h*=MBW zTDF_?WM;obV2112KC2{+_nRfzXBJ=LGfQ7jMz$c@;-}GARkXsm25?$4)fkVrVLqd= z=)Ps&Dl^5G7?;lXw!zpx;ft05V zM1}#RLc+2Iub>8k8VG72sDYpcf*J^FAgF<$27($0Y9OeA|1UKV;pROnc8G4@9yP-4 zI}46*`!<6SZr;msJ+_=uihZk;ic(5$UlM!@uk*>gx!Qr!UeV3HC;A;Suz9Q*80Ur<$ zVsPtmC*V%TJp=b#-1p*M4t|@>hVLBWvxb~Vj|s#dhm;;%EPXN3wjBTNece*B;ggib zWkpUYQ|s?eu+hsboRVHm!ecW()+gcXdU-{4)W4RZNs(Ss!i0KGeI*T1()rO~d7t^N zOv2;U$#`6wE=<4>l*Ki*G`Y6Cnx@qiRnu%GotcW-tvP~iX;E!m3f&7L9VG|&aR%QS zRY1)83ABn7)wCRvHz=ty6Lp^3@#tc%qqFchkO2v2QHyW~{Ym z`YPHbK&DCf{^%X$D`-T1QFSII715lsmCI}DsJN0mLUnyrZJm?6(A-`@`pV*ARx%oY zb1oh`=i#x3ACJz%xBJSg%j@U?g|oD>w5Ya}HYo5hKSie12!=Cb zWQK>7+*Y#^qaOi=N0D)}kb86R=;X)F9IzJMPrH>4C%uIFwmB$$ABwK3aV{^T-zrH} zT~iLxpH(iuzlxq$*3z$)ke-ezsb5h^tvG#_Me`89w1lh4@Y*e`^EZ?Uv{ieB7PsB{F?9-bdFl&EI_GIfQzT zOh>^MOg|~pn`L^BOm7|St3N8!Pr@|J50VBT^C~7T(<~R$n`Ii)Rif9+G`f}ON_=;h z`6tTs`*J=0T_36biq2O*Tc)q^yhk3>U8=tg-|S_2kxUP{#YZoZX?%ZDs&~rtR-=!` zr+KCK56JX7nLdLLRC0TpWV&;dkKQ8FqFmo9(_Kbi{m*4OXM~S_Ri-WS`-f%veR({` zCE8~_?(c7M{VaTgm+6Z#y;G(`QONq-Bh&ayvqV2C)A2GrtiSJle5P5dkC5q9nZ|eE zrS@mbbeoKid_OJYr&y-X%kSG|x-Q)J{{1rT#&$oE)l{uJo}YjB+&P0{#Ru zXXI$YS?SZmQbrC&vm_2F>LN7VaRZehQBwG^eTVb3!FNi|S&Bb>ls^ph6-d#jkMaj4 zE|3Q7OC^skm)SU=a6Z7M}|@=%Youb%V%)}!rFmOgP#u4{%>5A6H?dYv#T}M*WtFnLI z+t~T~zDKl^40o@H@;0X~opokeB9MIp})xtLcv_FaPcIUFI)_%xTe{%g0PsD&8i3Ad#yQRQq4TiTMsf0p;& ViDQK?jSFA()H`z~UXaxGe*l}{it7LX literal 0 HcmV?d00001 diff --git a/examples/log-exp-curves/log_exp_curves.cpp b/examples/log-exp-curves/log_exp_curves.cpp new file mode 100644 index 0000000..1689b8e --- /dev/null +++ b/examples/log-exp-curves/log_exp_curves.cpp @@ -0,0 +1,192 @@ +/** + * log_exp_curves.cpp — Sweep log/exp/sqrt functions with comparison tables + * + * For each function group, prints a table of FR_math result vs IEEE double + * reference with error%, then a summary line (max_err%, avg_err%). + * + * Build: make ex_logexp + * Run: ./build/ex_logexp + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" + +#define R 16 /* working radix for all tests */ + +static double pct_err(double measured, double ref) +{ + if (fabs(ref) < 1e-12) + return fabs(measured) * 100.0; + return ((measured - ref) / ref) * 100.0; +} + +/* Accumulator for max/avg error tracking */ +typedef struct { + double max_abs_pct; + double sum_abs_pct; + int n; +} err_stats_t; + +static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; } + +static void err_add(err_stats_t *e, double pct) +{ + double a = fabs(pct); + if (a > e->max_abs_pct) e->max_abs_pct = a; + e->sum_abs_pct += a; + e->n++; +} + +static void err_print(err_stats_t *e, const char *label) +{ + printf(" %s max |err|: %.4f%% avg |err|: %.4f%% (%d points)\n", + label, e->max_abs_pct, + e->n > 0 ? e->sum_abs_pct / e->n : 0.0, e->n); +} + +/* ------------------------------------------------------------------ */ +static void section(const char *label) +{ + printf("\n========================================\n"); + printf(" %s\n", label); + printf("========================================\n\n"); +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math — Log / Exp / Sqrt Curves (v%s, radix=%d)\n", FR_MATH_VERSION, R); + + /* -------------------------------------------------------------- */ + /* Log functions: FR_log2, FR_ln, FR_log10 */ + /* -------------------------------------------------------------- */ + section("Log functions (input > 0)"); + + double log_inputs[] = {0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.0, 10.0}; + int n_log = (int)(sizeof(log_inputs) / sizeof(log_inputs[0])); + + err_stats_t e_log2, e_ln, e_log10; + err_reset(&e_log2); err_reset(&e_ln); err_reset(&e_log10); + + printf(" %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n", + "input", "FR_log2", "ref_log2", "err%", + "FR_ln", "ref_ln", "err%", + "FR_log10", "ref_log10", "err%"); + printf(" %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n", + "--------", "------------", "------------", "--------", + "------------", "------------", "--------", + "------------", "------------", "--------"); + + for (int i = 0; i < n_log; i++) { + double x = log_inputs[i]; + s32 xfr = (s32)(x * (1 << R)); + + double fr_l2 = FR2D(FR_log2(xfr, R, R), R); + double fr_ln = FR2D(FR_ln(xfr, R, R), R); + double fr_l10 = FR2D(FR_log10(xfr, R, R), R); + + double ref_l2 = log2(x); + double ref_ln = log(x); + double ref_l10 = log10(x); + + double e2 = pct_err(fr_l2, ref_l2); + double en = pct_err(fr_ln, ref_ln); + double e10 = pct_err(fr_l10, ref_l10); + + err_add(&e_log2, e2); + err_add(&e_ln, en); + err_add(&e_log10, e10); + + printf(" %-8.4f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n", + x, fr_l2, ref_l2, e2, fr_ln, ref_ln, en, fr_l10, ref_l10, e10); + } + + printf("\n"); + err_print(&e_log2, "FR_log2 "); + err_print(&e_ln, "FR_ln "); + err_print(&e_log10, "FR_log10"); + + /* -------------------------------------------------------------- */ + /* Exp functions: FR_pow2, FR_EXP, FR_POW10 */ + /* -------------------------------------------------------------- */ + section("Exp functions (input -3.0 to 3.0)"); + + err_stats_t e_pow2, e_exp, e_pow10; + err_reset(&e_pow2); err_reset(&e_exp); err_reset(&e_pow10); + + printf(" %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n", + "input", "FR_pow2", "ref_pow2", "err%", + "FR_EXP", "ref_exp", "err%", + "FR_POW10", "ref_pow10", "err%"); + printf(" %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n", + "--------", "------------", "------------", "--------", + "------------", "------------", "--------", + "------------", "------------", "--------"); + + for (double x = -3.0; x <= 3.001; x += 0.5) { + s32 xfr = (s32)(x * (1 << R)); + + double fr_p2 = FR2D(FR_pow2(xfr, R), R); + double fr_ex = FR2D(FR_EXP(xfr, R), R); + double fr_p10 = FR2D(FR_POW10(xfr, R), R); + + double ref_p2 = pow(2.0, x); + double ref_ex = exp(x); + double ref_p10 = pow(10.0, x); + + double ep2 = pct_err(fr_p2, ref_p2); + double eex = pct_err(fr_ex, ref_ex); + double ep10 = pct_err(fr_p10, ref_p10); + + err_add(&e_pow2, ep2); + err_add(&e_exp, eex); + err_add(&e_pow10, ep10); + + printf(" %-8.2f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n", + x, fr_p2, ref_p2, ep2, fr_ex, ref_ex, eex, fr_p10, ref_p10, ep10); + } + + printf("\n"); + err_print(&e_pow2, "FR_pow2 "); + err_print(&e_exp, "FR_EXP "); + err_print(&e_pow10, "FR_POW10"); + + /* -------------------------------------------------------------- */ + /* Square root: FR_sqrt */ + /* -------------------------------------------------------------- */ + section("Square root (FR_sqrt)"); + + double sqrt_inputs[] = {0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, + 9.0, 10.0, 16.0, 25.0, 50.0, 64.0, 100.0}; + int n_sqrt = (int)(sizeof(sqrt_inputs) / sizeof(sqrt_inputs[0])); + + err_stats_t e_sqrt; + err_reset(&e_sqrt); + + printf(" %-10s | %-14s %-14s %-8s\n", + "input", "FR_sqrt", "ref_sqrt", "err%"); + printf(" %-10s-+-%-14s-%-14s-%-8s\n", + "----------", "--------------", "--------------", "--------"); + + for (int i = 0; i < n_sqrt; i++) { + double x = sqrt_inputs[i]; + s32 xfr = (s32)(x * (1 << R)); + double fr_sq = FR2D(FR_sqrt(xfr, R), R); + double ref_sq = sqrt(x); + double esq = pct_err(fr_sq, ref_sq); + err_add(&e_sqrt, esq); + printf(" %-10.4f | %14.6f %14.6f %7.3f%%\n", + x, fr_sq, ref_sq, esq); + } + + printf("\n"); + err_print(&e_sqrt, "FR_sqrt "); + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/posix-example/Makefile b/examples/posix-example/Makefile new file mode 100644 index 0000000..ebb3213 --- /dev/null +++ b/examples/posix-example/Makefile @@ -0,0 +1,29 @@ +# posix-example — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = FR_Math_Example1 + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): FR_Math_Example1.cpp $(SRC_DIR)/FR_math.c $(SRC_DIR)/FR_math_2D.cpp + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/posix-example/README.md b/examples/posix-example/README.md new file mode 100644 index 0000000..0c3bb4e --- /dev/null +++ b/examples/posix-example/README.md @@ -0,0 +1,46 @@ +# POSIX Example — Comprehensive FR_Math Demo + +A full-featured desktop demo that exercises most of the FR_Math library +including fixed-point arithmetic, trig functions, error statistics, +2D matrix transforms, and formatted printing. + +## What it demonstrates + +| Topic | Functions / macros used | +|-------|----------------------| +| Overflow in 8-bit arithmetic | Raw C multiply showing wrap-around | +| Radix interpretation | `FR2D`, printing same integer at radixes 0-14 | +| Addition (with saturation) | `FR_FixAddSat` | +| Multiplication (with saturation) | `FR_FixMulSat` | +| 2D matrix transforms | `FR_Matrix2D_CPT`: translate, rotate, inverse, `XFormPtI`, `XFormPtI16` | +| Radix precision effects | Comparing radix 6 vs 11 for round-trip accuracy | +| Forward trig (optional) | `FR_CosI`, `FR_SinI`, `FR_TanI` sweep with error stats | +| Radian trig (optional) | `FR_cos` radian-native path | +| Inverse trig (optional) | `FR_acos` | +| Power / log (optional) | `FR_pow2`, `FR_EXP`, `FR_POW10` sweep | +| Formatted printing | `FR_printNumF` with `putchar` callback, `FR_CEIL`, `FR_FLOOR` | + +Several test sections are gated by flags (`gTestForwardTrig`, etc.) near +the top of the file. Edit them to enable additional sweeps. + +## Building + +```bash +make # compiles FR_Math_Example1 +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os \ + FR_Math_Example1.cpp ../../src/FR_math.c ../../src/FR_math_2D.cpp \ + -lm -o FR_Math_Example1 +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`, `../../src/FR_math_2D.cpp`, `../../src/FR_math_2D.h`) +- Standard C math library (`-lm`) diff --git a/examples/trig-accuracy/Makefile b/examples/trig-accuracy/Makefile new file mode 100644 index 0000000..f81e4b0 --- /dev/null +++ b/examples/trig-accuracy/Makefile @@ -0,0 +1,57 @@ +# trig-accuracy — self-contained build +# All artifacts stay in this directory. +# +# Requires libfixmath source at ../../compare_lfm/libfixmath/libfixmath/ +# +# Usage: +# make Build the example (fails if libfixmath not found) +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src +LFM_DIR = ../../compare_lfm/libfixmath/libfixmath + +CXXFLAGS = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os +CFLAGS = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = trig_accuracy + +# libfixmath .c sources needed for sin/cos/tan +LFM_SRCS = $(LFM_DIR)/fix16.c $(LFM_DIR)/fix16_trig.c $(LFM_DIR)/fix16_sqrt.c $(LFM_DIR)/fix16_exp.c +LFM_OBJS = fix16.o fix16_trig.o fix16_sqrt.o fix16_exp.o + +.PHONY: all run clean check-lfm + +all: check-lfm $(TARGET) + +check-lfm: + @if [ ! -f $(LFM_DIR)/fix16.h ]; then \ + echo "ERROR: libfixmath not found at $(LFM_DIR)"; \ + echo "This example requires libfixmath source."; \ + echo "Clone it into compare_lfm/libfixmath/ from the repo root."; \ + exit 1; \ + fi + +fix16.o: $(LFM_DIR)/fix16.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_trig.o: $(LFM_DIR)/fix16_trig.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_sqrt.o: $(LFM_DIR)/fix16_sqrt.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_exp.o: $(LFM_DIR)/fix16_exp.c + $(CC) $(CFLAGS) -c $< -o $@ + +$(TARGET): trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS) + $(CXX) $(CXXFLAGS) trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS) $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/trig-accuracy/README.md b/examples/trig-accuracy/README.md new file mode 100644 index 0000000..8406c62 --- /dev/null +++ b/examples/trig-accuracy/README.md @@ -0,0 +1,80 @@ +# Trig Accuracy — FR_Math vs libfixmath + +Head-to-head trig accuracy comparison between FR_Math and +[libfixmath](https://github.com/PetteriAimworking/libfixmath), +using IEEE 754 double-precision as the reference. + +## What it demonstrates + +- Sweeps sin, cos, and tan over 0-360 degrees in 1-degree steps +- FR_Math calls: `FR_SinI(deg)`, `FR_CosI(deg)`, `FR_TanI(deg)` (integer degrees, s15.16 output) +- libfixmath calls: `fix16_sin`, `fix16_cos`, `fix16_tan` (fix16_t radians, Q16.16 output) +- Reference: `sin()`, `cos()`, `tan()` from `` (IEEE 754 double) + +## Output tables + +**Detail table** (one row per degree): + +``` + deg | FR_sin LFM_sin ref_sin FR_err% LFM_err% | (same for cos) | (same for tan) +``` + +**Summary table**: + +``` + function | FR_max% FR_avg% | LFM_max% LFM_avg% + -----------+--------------------------+--------------------------- + sin | ... ... | ... ... + cos | ... ... | ... ... + tan | ... ... | ... ... +``` + +## Building + +This example requires the libfixmath source tree at +`../../compare_lfm/libfixmath/libfixmath/`. + +```bash +make # compiles trig_accuracy (checks for libfixmath) +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +LFM=../../compare_lfm/libfixmath/libfixmath +g++ -I../../src -I$LFM -Wall -Os \ + trig_accuracy.cpp ../../src/FR_math.c \ + $LFM/fix16.c $LFM/fix16_trig.c $LFM/fix16_sqrt.c $LFM/fix16_exp.c \ + -lm -o trig_accuracy +``` + +## Expected output + +``` +FR_Math vs libfixmath — Trig Accuracy Comparison (v...) + + deg | FR_sin LFM_sin ref_sin ... + ----+-------------------------------... + 0 | 0.00000 0.00000 0.00000 ... + 1 | 0.01745 0.01745 0.01745 ... + ... + 360 | 0.00000 0.00000 -0.00000 ... + + ============================================================ + Summary + ============================================================ + + function | FR_max% FR_avg% | LFM_max% LFM_avg% + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- libfixmath source at `../../compare_lfm/libfixmath/libfixmath/` +- Standard C math library (`-lm`) diff --git a/examples/trig-accuracy/trig_accuracy b/examples/trig-accuracy/trig_accuracy new file mode 100755 index 0000000000000000000000000000000000000000..828f4ac4597747e588ba1f7c82db41beb0295113 GIT binary patch literal 36960 zcmeHwdt6gjw*NW_=t%-dASwogpkfp7h2pdI5)@kqK1x8X^brtHAUsUKSCtZ~wFuZs zb*fc+h3R9I+L?BSwzkub(01nHqjPm^t)1UYi=EEBp!PAXwYDYY_g&|lXtd6K{N{H* zzdwFCpU*jGuf5k^d+oK>UVH6*!scJU`}BtpB7tzJa7W^v5lr+DnUEqHfja}Y!*OTY z9rN<$*%xu{^_Nun`m7&8Oy?{R9gf_2cjaDREY-JOpGOJ|R>*fVhQm=)ysm~*@^vFU z50IH&&3L(-VeZ$}D~LQ-(?~cR)itY^S6|J#k=~`rGQHar42J#bi3&Z)P`VtB!m7&Z zYYg5vgw8Cp|4U%`tb;!W>z=l0|M+XoD_a`79hjG-4P@_2fPoF$j!O|5*Mc zei@XAm(q34A?V%1bUZgO$?eX-&1JIO=L18y#tZn#SzldKTrpwJ1n^b59EiBM-Xz>t z>m{X#H`XgmN}79{XIt zk-!l?p{l519y`;gC(^tK1grV** zq|fXb)p9ACzSRIG+^y=N{X_89NohmBMDH2U8%|!SeADEL)=iQ9t(y$}THu7>?u{dF zyLwpvg|*~8x01kz)oT*-W6#}U_3EMuV)I8nw=f($Yhzj$o+~6T!%=`+Hx2C<>(n`Y z4{ozt&i>Rb)*jqD;`no2>fhfUhgT4)ghmGvCdqH{FSk3Qhq_~5Y9*2t!RU|jU4i}pBx+$mMWP*Mck=JyB;mH0^rjWNDbz9X-ZYA$FE$!@?ZuR#3La4a-6jk&%kXFNn)I;01aG82o`(}QpBL9d`asJo9S$Ykp zR>9f8@8Qtft*|}xZM;CaZO~m$4sahvox9~a(wKXCR15r~;oPQq6+I6N6@M3R{^*us z&~4k43){AOKcBLoq9+S)OMdM6l~yl>SH{|qt{z1Co<&w~GtxbGSiSM){Ma^(kJe50 z{&RDI2m5T@6w&Xrl)_Hz{T53_ERW-C&`yI*M$NVA>z)mEwL|uwRa?EiaaM2rUk7-W z`GWe#w%C9tr*CPw!c<()CyuP6n(@=jh-CpB_FL-7>*f|Gk8kUf@eK4sx0f_WAG(2`yuMcn9GJMwFm$_Wt;&lGt;R zus7I6&tReAv%#|O5#Jdr`8mmdX~#it=+$75UszDa5) zS8AsR?X;>F_n%Wk|33Urui^J&oJ;Z+3ctPt{(W)9=eOooz~8rYsvT!U6@0BAWQnG3qs2-t zks6E|^HF1JI?~BojSa$;X?7wHV zjnn#KlM&OEK`gRni}hl*rrVlr@rGw>x65>nAAj85#B_-1c#Ibw-fVX&bj`_)Mh$JZ zTax!1wY6fTt~M<4#PEmgw%`VxGZgpu2AxgNWr>!MXffWZH>YLjXf{RsSNg9xeZ;O6 zG`b7Jn(fZ6m)*MBq`gjW{%NNm>dI@QBAaTnbRpwg?asE4Xj`oz$FU&QRg&CbJa*|w zgspywUWdAIg4S7rz9$M5fJ0;$veE*Mt6!3ALs|#^oiFY;T0qx={1Ct>b7j1^)mZvs zv+)?_L~X4$+BOt@8PTB2NNC9VQD0KXjX6J8Lk)J&$%>;0wX#+SPm*)ERnIbie_&v^ zCx}w~?JiZZJJ?^gzwNP2iHjTZZnP;Kc&JO8?6TE27@vbZCBdF_wa-B>?$Z}xZzs@4 z)(6{FMskx;Qw&?G56VszL{~57Jf9rQkP-Ze!Hj+*Za0YI)3U|d5!o6cNYA(>!0l0s z?aaS-!;yntHMOJ+4D1q>zIZou9e9fSqoI^bi++RHNI+~vE&o~HA~CM&ROBjIvg04_ zTbx(zY6hQ7k7a!e`k^^0kDWyH!=d!~JjP2I9qI7GH}Uvm9V5nM zyB-~kwqf@U*m`FWzv@jhPMT_ zZ$r5bwx2a#6RjKBq^m<)^Dzc37^@cWV*x)F@MG4Cy106B9fN$C_F##Z7wn?uxDm@C zgN9zPn>F1=?t?ULgB$XEw#Vb;|B8JHqq2QtNBfh;|8(qFF?Qnm)vgnYeTmRvgCaxe z4Iix$hGv_CgY-W8vI!*FOa|;LRU;-h3_*Y4!!>BT7k1SRyAl-~5q6at9F$xVM!CJe z-e&I}IO6V`>+?~f&K7LaJA$o}O(sI-$1iWPXH9N(&)4j;AA0AA8*3+nb{yu1`VrYa z+-~@}q)(dc#~+8EJMp-^5$#=!-f16Kdc@rvkI5sRU~5`uS#q5$oqe3t31X<_#fXG_t|b6Y!~CmMY*0ljJ*gkzRMU#=CW3dKYu@Y zHTD>DE%wmVM&=yLZ?bD&ZdKxr{L@;@wU#z*wD$Co2%cAYTx(oQvJ=x1vNeac*&Tw$ z*#NzX^-Hofk0Q3Iwd}<_*&=%H#2QS@V&4-NM9B@j#{6wVo|s0uKFsZdI4yqPeEF;36G5$WykFR)H&KPONM>nI;{j=vrVKKKae{SCLILyU3OollQU zvu=>TydwWYdGQ?YtNSM}WCq?fkXs_;b^&rrg4}GFt8L!pr*)89<}zYob{n`Dg?08t2alrU-WGQQfz4+_urei{h#|?-Zdf(e(->(v# z7hxX__eJGwy-$d2yiXTtfSkDfHy{ttw`;|0_CkZM&&-p&x_NX;dT;zamTCs=Y zfNr{A?}#IlS*{w$^&8d=_!UndaW|+$%+odHiXyI%$WjW#V)5xXW^T^7M( zJcD}XjV5XA`^J?IhA-IBx2fQl==%k+YwI4a%V~OX>mIfRGqxwOua>Cge9RFXpG&dq zi;Yq&e0k{`h+`1zZBQNbZlab;;7zh8_&5t>&${)^hgpbev*M2%L$JLhL?H(CM0Mg~$aT zl&^~uLY>a8W~1f3X7HhNCP9Zu=&KI8NflI(A?pZZ7x5X>BE%;{@Rsv1{~^t>m=p&g z=1b+eur-n6zk(4tVsNHT#5iEPWdC(Q73mbs-NsVY&@7GVsFBx-8py;VL}vM9m6?EX z6Mx27cRSW1SkplsS^HIjn|KYQv|S3??4S3;CyDRDf5Si5%_7$U@Ou<;VtQJPPitt< zLhg6k2c5Giy{(cCzX*72Y(~6UM`%X}dK%!FfQPls$#_3J4e-psE1iYy==h^X?}j8cEtY8qOux+G>lyU?<^5rEz$=i?jt|dG3 zK2i;Lc0afab5vN)QKG45?vKm)1~;$A;`CNm6LcJxW_7&q7<5cv&mO|3phtQB&$ z4h&4?eW`Qm;q7IFy^Rp8(KcFl45)@V7Yu@IA8_q3g?8_Wqa@e-!IWDvhqC(=d6)eY zW$U0zrx583!5VWMVy>oy>Upcd zh&^^gpI;vi!Crd7zsUG(m*@lACD$Xg69N2lDE}CHH$l)5zAyZz{0>7sU!89dFLpw& zN5F3r^3Obo#)5DflzJwii9dROFe1>+@21j(f45V_9b=XmCuPiMb#N`cAA><yzmG#NUJ9(O$AA?S|RjlP&<9OiYVpNP7N@b69` zB#Y@HUWY%)&^d>~uY*QrJm`eNw~x%wN_C;wD}eINEg_b@}aE7is8#9p9e}CfFw998h~A(C>B;>$g;_uPxB^ z0r(-!YSL@RbVC+cKbtVGxyY4SkAAL4J{`70H#%EwRF36dih!?v!C4U1QogMHunly| z_$`DS&#cuTUcc)!?_V~gA-;fqV-n+cbt)Vu% zIL5q}y!V{uJssxThxaeQYuJPMeoyO7@owg;TL)1G`)H0?cnnU#2 z2cCGs-tfe$?mqb2#M%RUzrkL6>NdfZ=`B3n0NM0DuvnLiG5kDi{fgSqPC*`fzAE)h z)L{90+0>vb)BCjKDnxm&7JD44W2Jd4K8O}=MLTnYY0;>dwuSNgo?2+pBZh!n9C~sU zFm32nEsu{Eg3fi$E{u74VSY+$r8TCYQm|4b?Q07YZwm?IIdE#N7`|+b?dn))pHT+-@=EY#_wFxc5 zz*lQ1b$q1`!u+Ue{|0jZ8glO*^`_AQ*)?NT-``;rYnQ^lqIDag7mm3ut79a+w4y&};N$r_Cg<%!{Livc^7W-U=n1~D zME5_Zr|HnsTFm!6XR)3A8hY3ZJ=_C5Wa!?99)7LpA?B(ceEju3@aw$;iXOgE^nmrA z6w3`)kI;51eOV8keRrbS{>KxC+;@X+uXeajgSf(y@UC(AU{PzXorSg6@*N_tnV*S{ z7Q09feMfRTGY3<4Dr~m|I__3Q>V{zqbFAUrFb}#+hfe20w{xK5G<>W0oizS#jIpf( z>sXLtpix-M(jbrB1KKo1elG@3ypJ{%_P}%OP{5C?J@%5ZXN;Oqib%Xy(Cbh~7fMr7 z6X3((Yq!8)_F3w;J2$E6&H&j&x2+K^}`@-0G=E^DN9Mg`JGEaaLC+hN{) zb1LK4y$YWSyIBxyo%tN_>Od<4unuJ*xNRD0SqPes)`mqtkMd82@w&Qray55tGdh3U zWLz-RI^!3}9{~Nopl`e;sSB3(yBce)J8YPHFju_L?22h=TK3#wTj=pU*t^x?w+jYo zZHhjbhHjPC#u~ymcdYj;-Z`hS-*iFp-{kp226O`7(uDoA3m2v|>?sY-Y&t)saSwi> zaU6T8e-nW(qHkFLUNHS+t2AerH^%QJlWFe-o*VIvfY*z?1CQFTuER-iO7PUH>vEU4 zdB0H<^meSfBiT2L>+pRc&6;f^tOEsn=a_^wAJz-!K_^wvIxSeIC7}%8XtH>(SLA(6 zT|I6k-iJ`4eYn4V(p#}9g4Hbj&B=PZjJX@}$*Usv$9a7z zK7{_I>Fq3gUSIiWSyj{#M>Ml!<5OCc=ft&=h3D5qO&4N3awUR}HiTZ~_4ypEN5s%K zjbb?H0w0-+$&KE@a*aa}2`; zuswYDcvbtf)sn$-<$iY;Poy05s;B_PKspq3^9Oj=imZJeO39>TJa(&)= zKwajE`^rAd2TSYC5VQ$=?l;=i`P$`mDz}@Vw99?wxJ4ALuZqam)DFS^&jIaK9ldx> zM`j|M1#A37fou-mulT@vKgZJiO@!sZmXPjR4nDQ;4ZLrs#oChNwM5M^#HR=FeJ=Ynf4fP-It_N=hE3)& zUL9=YAkt36DsiDPZr&%#hu?soi;&hUsw>mS>pSy~fXC-CmM!t`7@dfh$72j0d`67^ zdHnG`#{_NDBUp1K;a7z)C-!V%TQ5l8_A-L;UCx_!8oIObUJmb7^L`ok&wtJC zn`7QW+*hJ9*jNsTNwE*YYb}kFu(#yDRu@CMj8rc~Go4r7BuoD8aK{~9AC0d4A4pbZyM~_ z19qRodbJX~a}3n;O>+dpWq{8iwkp9pl7=P~U=NsolTZX$pvWxil!xtz4R_&Jx4IkW zznjIk!>qwM_<85aZ&cLMo44jLe43N@R5Oq#3e-Z&)*PM>y)u6#+Y#$dY?tmi2cQ0t8KKA)|A530hadcre|jyTL=4#Qe&KNSPPpJoYH)w8{Wiw zm||%u@**WWJ>av`^=l8~=h+61@%dZ)yTX;}&`nl7^z#;YXZzy5>_M7!x?6fv`?v8= z*g3}Z#cP6K$YPzo>{n--Qm4sRr?|26gMhnLBe1@Zzn$`T zd=Y#b1k7XLJ+jnt_!`gCVJqp7#azf_4rG&txcRqI%*?((gfH;(RgciX__YSqB<^-_Zg3(y#vXQqD^gLg?C`;SFlH( zzs-mRnOEO$L;jwQdl17&d+V@E-d~u~|F}H|GQ>~84O7sknRs`<)Qq^G)y>~TDu5G- zeT^xc|LGQc!w*NKeC%f;-(zQ8H$>#xhL5L+vzRy~Te+KU#UR;5FY?%bMJ`)=cocAX z@9ihpSN+u-t9}{kUxu#YQ^RxkcYL=hdgXOYgrd(#_|Lc#gQf4~sj1Kf#?VI#$@LD} z=P{I84Ld^`>mmJTkVOa9iu~P<$IvaH!DDdf-O~?W<74_^9%Fx3--unEh@Woeqvg3_ znd=~Uz?#U;bc8WG>|KLe-62X_FGY|(j+VK`U0r|3u>|A82Oz%W-@ja|cUJihgL>I< z&WXbEr>0$lo}9u}=?tfxk~-nesPMtf_Y5cb8XvioXwy z^H^Q&pv86};)d^;IvhG;n1A~y5=Bt3RmXS_qMruDEX>O;(B(2wDWf)zvGn8v zMC@y1?~lcA1kf+WZRY-?e#y%_FmGM|O|={P$3EE$;VSF5q>4Kur3d+?wSMU^zjTD0 zjvAky`+dB|N_=@R37j8a|HLlWYzekLYL(vG*T`=G1GxHee~bHT25@1WGH@C9CEScJ z;8x*=t>Su27^8kUC}YqEgPof1wapJ>B=df8QsL zi}`;m_;UH@g?&XI7N1=4#>$_SG?!MEWjV)`e_MX6VppZ3YV@kVtomoyVz*fRa&>-9 z@akt*&sp=qn)0>ZuYG8pc75aeLHF0*|M~s*Zus4X#En}wezI{^ZDZ}3+Smv3A9(VC zGY@F%X4EaOd${grb#K;vPYHV!?JJBd3f=urDO9Ry63a|3JX3dbgu|0?QoV=PIRejde?lo z{;wOE1A&gI1lk)cPz{{P5)6pcFo7oN1xm!^Fm5=UFn%?eF;bu+lRypFTHSpx?C$ zbmuby{W}~_BfAPp&39ww~yGEyhSa z{^Vl|(V#}6@3s+r+=9QO_zCDfL3H1fM6qo|?>>0)NBuE20lxCR+9y z(Wy6xtX)J0-Xd~z*CDGtP{RDQaFB))C&1q=L{jwPEc73e2Q_)QX4@}vpLD^Tf%w|P}nd9j7NFQ-5c7)*JFrRYT|r z$&;()hhJJtD^^z))|6IN5~NU3ur7h$1#4C&5UWPY;DnT;&>EjK9yBLcE6?j7t)IC3 zlHAx}%N5Q1(AY9g)4H2WX8(0I=3ii!gFdZhjVIGEyV>I1#l_X*Ox4L#CnQfZRjsaBwYtUv zMy?H_Stg98Psm=pqS#$rSy&8~GUv^k zXPQ1W#Z*+adU<)V$&VyyVaBH7$|5#wY2*{(TdVXpg(-OR#c!UY`31iagm)13P=eqV zg0SZkXhnv4f7PWxXxzlYEm+9y*QbOm+FMN-{{O|MmOH2d%+^R6&bog6~K z>+rcLdJ^8n#@x6_3K};Qe}_Aigyx~7?m)oQYb4c-Xj1Qsp~2f@DJTkoeg7B|W+V_j zod9?&sduu!x`l)iGtnwDskWOb_-zZ(#zYG4NFw$2aiscW9NI>wo5RgDX0s#pGBoL55Kmq{? z1SAlUKtKWk2?Qh%kU&5J0SN>o5RgDX0s#pGBoL55Kmq{?1SAlUKtKWk2?Qh%kU&5J z0SN>o5RgDX0s#pGBoL55Kmq{?1SAlUKtKWk2?Qh%kU&5J0SN>o5RgDX0s#pGBoL55 zKmq{?1SAlUKtKZjV-hfd@}0Q(oFg1bBAt7Lmvk}gyP|vvyrr}i|Gm+AEtAg>5O9Ei zFbX%$KoQb$Ghv5=PjJBjE)M)J`{EjMtipe^uUUcj>YBom0=JZ?CO&xRDmB4Dmp~qe zqBSSu@yR4S-o=4as+@uv3ah3_Ql%9YF`!wnx}0#P;Opana)^<;r{d8(4UZGE!HAXA zd|u8p@Ibe68}roEF$<-=S%PClL3Paxx(@)*XnPtS9XQKMl{|@7Pp11xRY|L=R#Kgs zc4wmO>})&^GuFOzJho?m!*tp!+>ZbHok2K&OI5XYGCeME2N$lcpmqT~O~v_Ex0kM? zQG5grB^S_wlJ%?b@9qoBsX?e*T>+_3i?E6g3)L=n4IN`4g7_DA<8kp$JkI8ETle76 zdpA1NqM{(~(Sxd@B6?WG{c2Wm#~xSVUl1|wz8pOI9eDIC<`h5PUJRni)97DRQzp|1 z)eJfX?CnJ;=~xb;|E3~Uv3nJrQK7pRP;{6pZN@oXDjeQbuZEDfsMT~Jo9X4^@$?cr zb}tniIMk|Oc~yAgi%QqfJ~bGAR$X4Tax%T5 zt^ncJ)!gqcb@l4y^gA^k$3dwzk89C>6?gTpifg{1DqgpW-c*&-B~=AAsjKOvn)_Xk zb9iCC<*1eyR8Vga&LyHxk$*87Cio>1SE7J$Wf&fB8}T@qsG#l$X4Kumu)L8#eG-W; z;(*YB<8&li9TLK;$lo^#2){w%APSWTu@Z)cF^0BDR{UoM@^N3JFztwXCnE4 zM}pZto(pCo6&J3qDRz_; z7ZgEIKL{(RXHZ1}D1FYO66jZS+l5spgndHDNufh^NcDQ~71c#yU8_3WZ1M!fq#p>{ ztq%E<&>@)3pQtXW8-&fO`5}gI!~DC_=NrO%1yjtVakt&mBit4e6JxIFSB1>jE{LWM z)nVbV>Q6#$vwR}SJvfL2NAHn{L-RR@55;A8AI>>qc(Vc*vA+Sli(%Z`aTqDX&nfU@ zIKYYFBMNN5m}K~U1&*2~!=Ec~%ybzJ#sMf{IOvHyGCa};cgpZo9}H?tZ=nzN$nZ)Z z+$qC#J{V^}F+PvTg%zO$y8h<}QYIhIc9OS*86}1wL$&%YUxGF{5Pobp;Mr>K{|!LrQ;6OE61G#{YK-#y=&;pyT$x zQs8o&dByM#3VfGRu0hUT)swu*D3ztAs-BP#_jJ^ z;Mc=sc%K5NEA$R1@SQkcjqy7aI7TnSuPg8uIOL7XyA`-Y8Gr98@X4`q`Ck-RRNyZZ z80WD`^}PywNf~ci_#2kzIHi2J0!91OgEAyY3bPzH zndcnxp@K5oN*ZyCFI(z_k}dV)S{f|KfYf6-$svo9%W;d6EJu`uFw74^Jmv?@xC$T; z^W{tZmZ?@C981#<@0&a&C zhe)B8V}%0wBCe|$Qs8wJ^2J}AAgL3zz_to0mPhV{x zk9c1#=d`a}iihM7i^)kKM_N~bYJ_Lk0EB5*0S?3b0VQU`*`RFd|BWS&!;v>XIfXms z##xtg3c1uR6RD) z@WyAi&S{Ig^rsnXPOO=AQGL&^bPK}X7dAe1dd_;ZHTMeck^3XPbv@>U{d&GA`_U?89MuVpcR&*6NJfyi;@2YIlc~ZhCiF+M$@X zmxjNtALxCnCZX`yo(sWC3qLLCpIf#w^oQ?<@7$XC(1^wvuT1FddiLzcKfmqSu0IrH zKJq(ANu1PO&MFq{p=qtfA?qO`;%1jXQV%Q g>y-!6e$o8F;~#x$+B|sBT5rPW3tQe?Elcfx1ItrT1^@s6 literal 0 HcmV?d00001 diff --git a/examples/trig-accuracy/trig_accuracy.cpp b/examples/trig-accuracy/trig_accuracy.cpp new file mode 100644 index 0000000..189367a --- /dev/null +++ b/examples/trig-accuracy/trig_accuracy.cpp @@ -0,0 +1,139 @@ +/** + * trig_accuracy.cpp — FR_math vs libfixmath vs IEEE double trig comparison + * + * Sweeps 0-360 degrees in 1-degree steps for sin, cos, tan. + * Prints a per-degree detail table and a summary table. + * + * Requires libfixmath source at compare_lfm/libfixmath/libfixmath/. + * Build: make ex_trig_accuracy (only built if libfixmath is present) + * Run: ./build/ex_trig_accuracy + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" +#include "fixmath.h" + +static double pct_err(double measured, double ref) +{ + if (fabs(ref) < 1e-12) + return fabs(measured) * 100.0; + return ((measured - ref) / ref) * 100.0; +} + +typedef struct { + double max_abs_pct; + double sum_abs_pct; + int n; +} err_stats_t; + +static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; } + +static void err_add(err_stats_t *e, double pct) +{ + double a = fabs(pct); + if (a > e->max_abs_pct) e->max_abs_pct = a; + e->sum_abs_pct += a; + e->n++; +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math vs libfixmath — Trig Accuracy Comparison (v%s)\n\n", FR_MATH_VERSION); + + err_stats_t fr_sin_e, fr_cos_e, fr_tan_e; + err_stats_t lf_sin_e, lf_cos_e, lf_tan_e; + err_reset(&fr_sin_e); err_reset(&fr_cos_e); err_reset(&fr_tan_e); + err_reset(&lf_sin_e); err_reset(&lf_cos_e); err_reset(&lf_tan_e); + + /* Header */ + printf(" deg | FR_sin LFM_sin ref_sin FR_err%% LFM_err%%" + " | FR_cos LFM_cos ref_cos FR_err%% LFM_err%%" + " | FR_tan LFM_tan ref_tan FR_err%% LFM_err%%\n"); + printf(" ----+"); + for (int g = 0; g < 3; g++) printf("--------------------------------------------------------------+"); + printf("\n"); + + for (int deg = 0; deg <= 360; deg++) { + double rad_d = deg * M_PI / 180.0; + + /* IEEE double reference */ + double ref_s = sin(rad_d); + double ref_c = cos(rad_d); + double ref_t = tan(rad_d); + + /* FR_math: integer-degree API, result is s15.16 */ + double fr_s = (double)FR_SinI(deg) / 65536.0; + double fr_c = (double)FR_CosI(deg) / 65536.0; + double fr_t = (double)FR_TanI(deg) / 65536.0; + + /* libfixmath: convert degrees to radians as fix16_t */ + fix16_t lf_rad = fix16_from_dbl(rad_d); + double lf_s = fix16_to_dbl(fix16_sin(lf_rad)); + double lf_c = fix16_to_dbl(fix16_cos(lf_rad)); + double lf_t = fix16_to_dbl(fix16_tan(lf_rad)); + + /* Error calculation */ + double fse = pct_err(fr_s, ref_s); + double fce = pct_err(fr_c, ref_c); + double fte = pct_err(fr_t, ref_t); + double lse = pct_err(lf_s, ref_s); + double lce = pct_err(lf_c, ref_c); + double lte = pct_err(lf_t, ref_t); + + err_add(&fr_sin_e, fse); err_add(&fr_cos_e, fce); err_add(&fr_tan_e, fte); + err_add(&lf_sin_e, lse); err_add(&lf_cos_e, lce); err_add(&lf_tan_e, lte); + + /* Clamp tan display for readability near poles */ + int tan_pole = (fabs(ref_t) > 1000.0) ? 1 : 0; + + printf(" %3d |", deg); + printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%", + fr_s, lf_s, ref_s, fse, lse); + printf(" |"); + printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%", + fr_c, lf_c, ref_c, fce, lce); + printf(" |"); + if (tan_pole) + printf(" %10.1f %10.1f %10.1f (pole) (pole)", + fr_t, lf_t, ref_t); + else + printf(" %10.5f %10.5f %10.5f %9.3f%% %9.3f%%", + fr_t, lf_t, ref_t, fte, lte); + printf("\n"); + } + + /* Summary table */ + printf("\n ============================================================\n"); + printf(" Summary\n"); + printf(" ============================================================\n\n"); + printf(" %-10s | %12s %12s | %12s %12s\n", + "function", "FR_max%", "FR_avg%", "LFM_max%", "LFM_avg%"); + printf(" %-10s-+-%-12s-%-12s-+-%-12s-%-12s\n", + "----------", "------------", "------------", + "------------", "------------"); + + #define SUMMARY_ROW(name, fr_e, lf_e) \ + printf(" %-10s | %11.4f%% %11.4f%% | %11.4f%% %11.4f%%\n", \ + name, \ + (fr_e).max_abs_pct, \ + (fr_e).n > 0 ? (fr_e).sum_abs_pct / (fr_e).n : 0.0, \ + (lf_e).max_abs_pct, \ + (lf_e).n > 0 ? (lf_e).sum_abs_pct / (lf_e).n : 0.0) + + SUMMARY_ROW("sin", fr_sin_e, lf_sin_e); + SUMMARY_ROW("cos", fr_cos_e, lf_cos_e); + SUMMARY_ROW("tan", fr_tan_e, lf_tan_e); + + printf("\n FR_math: FR_SinI/FR_CosI/FR_TanI (integer degrees, s15.16 output)\n"); + printf(" libfixmath: fix16_sin/cos/tan (fix16_t radians, Q16.16 output)\n"); + printf(" Reference: IEEE 754 double sin/cos/tan\n"); + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/trig-functions/README.md b/examples/trig-functions/README.md new file mode 100644 index 0000000..bf5b31f --- /dev/null +++ b/examples/trig-functions/README.md @@ -0,0 +1,55 @@ +# Trig Functions — Arduino Example + +Demonstrates sin, cos, tan, atan2, and angle-unit conversions using +integer-only fixed-point math on Arduino. + +## What it demonstrates + +| Feature | Functions / macros | +|---------|-------------------| +| Integer-degree trig | `FR_CosI`, `FR_SinI` (s15.16 output) | +| BAM angle conversion | `FR_DEG2BAM`, `fr_cos_bam`, `fr_sin_bam` | +| Radian-native trig | `fr_cos`, `fr_sin`, `fr_tan` (arbitrary input radix) | +| Inverse trig | `FR_atan2`, `FR_acos` | +| Angle conversion | `FR_DEG2RAD` (shift-only, no multiply) | + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `trig-functions.ino` from **File > Examples > FR_Math > trig-functions**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/trig-functions +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/trig-functions +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +## Expected serial output + +``` +=== FR_Math Trigonometry === + +Integer-degree API (s15.16 output): + cos(0) = 65536 sin(0) = 0 + cos(45) = 46341 sin(45) = 46340 + cos(90) = 0 sin(90) = 65536 + ... + +BAM API: + 60 deg -> BAM = 10922 + cos_bam(60) = 32768 + sin_bam(60) = 56756 + +Radian API (radix 12): + cos(1 rad) = 35419 + sin(1 rad) = 55117 + tan(1 rad) = 101994 + ... + +Done. +``` diff --git a/examples/wave-generators/README.md b/examples/wave-generators/README.md new file mode 100644 index 0000000..a262185 --- /dev/null +++ b/examples/wave-generators/README.md @@ -0,0 +1,57 @@ +# Wave Generators — Arduino Example + +Demonstrates square, triangle, sawtooth, PWM, morphing triangle, +noise, and ADSR envelope generators on Arduino — all integer-only. + +## What it demonstrates + +| Feature | Functions / macros | +|---------|-------------------| +| Phase increment | `FR_HZ2BAM_INC` (Hz + sample rate to BAM step) | +| Square wave | `fr_wave_sqr` | +| Triangle wave | `fr_wave_tri` | +| Sawtooth wave | `fr_wave_saw` | +| PWM (variable duty) | `fr_wave_pwm` | +| Morphing triangle | `fr_wave_tri_morph` | +| LFSR noise | `fr_wave_noise` | +| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` | + +All wave functions take a `u16` BAM phase and return `s16` in [-32767, +32767]. +The ADSR envelope returns `s16` in [0, 32767] (unipolar). + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `wave-generators.ino` from **File > Examples > FR_Math > wave-generators**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/wave-generators +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/wave-generators +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +## Expected serial output + +``` +=== FR_Math Wave Generators === + +440 Hz @ 8 kHz -> phase_inc = 3604 + +phase sqr tri saw pwm75 morph +0 32767 0 -32767 32767 0 +4096 32767 8192 -24575 32767 32767 +... + +Noise (10 samples): +16214 -8277 32725 ... + +ADSR envelope (attack=100, decay=200, sustain=0.75, release=400): +327 654 981 ... + +Done. +``` diff --git a/examples/waveform-synth/Makefile b/examples/waveform-synth/Makefile new file mode 100644 index 0000000..33a36a6 --- /dev/null +++ b/examples/waveform-synth/Makefile @@ -0,0 +1,33 @@ +# waveform-synth — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run (ASCII art mode) +# make run-csv Build and run (CSV output) +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = waveform_synth + +.PHONY: all run run-csv clean + +all: $(TARGET) + +$(TARGET): waveform_synth.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +run-csv: $(TARGET) + ./$(TARGET) --csv + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/waveform-synth/README.md b/examples/waveform-synth/README.md new file mode 100644 index 0000000..da8e8ae --- /dev/null +++ b/examples/waveform-synth/README.md @@ -0,0 +1,76 @@ +# Waveform Synth + +Generates waveforms using FR_Math's wave generators and ADSR envelope, +with both ASCII art visualization and CSV output modes. + +## What it demonstrates + +| Feature | Functions used | +|---------|--------------| +| Square wave | `fr_wave_sqr` | +| Triangle wave | `fr_wave_tri` | +| Sawtooth wave | `fr_wave_saw` | +| PWM (75% duty) | `fr_wave_pwm` | +| Sine wave | `fr_sin_bam` | +| LFSR noise | `fr_wave_noise` | +| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` | +| Amplitude modulation | Sine wave multiplied by ADSR envelope | + +## Building + +```bash +make # compiles waveform_synth +make run # compiles and runs (ASCII art) +make run-csv # compiles and runs (CSV output) +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os waveform_synth.cpp ../../src/FR_math.c -lm -o waveform_synth +``` + +## Running + +**ASCII art mode** (default): + +```bash +./waveform_synth +``` + +Renders each waveform as a 64-column x 21-row ASCII plot, plus the +ADSR envelope and a combined sin * ADSR amplitude-modulation demo. + +**CSV mode**: + +```bash +./waveform_synth --csv +``` + +Outputs CSV with columns: `sample,sqr,tri,saw,pwm,sin,noise,envelope,combined`. +Suitable for importing into a spreadsheet or plotting tool. + +## Expected output (ASCII mode) + +``` +FR_Math — Waveform Synth Demo (v...) + 256 samples/cycle, BAM increment = 256 + + Square (fr_wave_sqr) (256 samples, showing 64 columns) + +max |********... | + 0 | | + -max | ...************************| + + Triangle (fr_wave_tri) ... + ... + Sin * ADSR (amplitude modulation) ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`) diff --git a/examples/waveform-synth/waveform_synth b/examples/waveform-synth/waveform_synth new file mode 100755 index 0000000000000000000000000000000000000000..815fdcee1cc60a5f962105764a5fc5fed89fada8 GIT binary patch literal 35808 zcmeHwdw5e-*7w>;3nxi|rY*M=ObQf}wnz&tw-K9iaeCp_w1RdVLud<4(+f#kxiz$i zm(apUMn)8zp^W+o2(L1O3OWk)_Lhqdh@j3m3Jx=GOT7#um!j0Zzjbl~fpIS1^S<+Z ze|$L)Cugs{_gQD{-&%XEz4tln{`Q+MzKbFfghPch24_nI(F0^cifAy-Y@BxcoU9*C zFPd(f&AHbf^3B(0{SC!@K14;ky>R-Sh1VC$_gk;eBZZDccJSmHc6)8<(ppXhjvM*4 zLw4r3b9kVfX>QlmBZ3;QrjfAQYijEj)LhNFk>3V$fZwf34W>i+NeVw;C?9rvaZRnq zS+(dYeSLe6D*P5IOqdQmLV0sO_*GZcTx;-+@>!Du@|{r!xG;?-|J}oGud7<(tSYfP zs}@$H){Xo|j1KTSpm1RNMwvpHvtA#=X}3?yD$KG^ojt1{pj~T#58)~wH(Xg8A6HUZ*!b3z78piDA;N(M6&LmvN;Ovd_?;R5E z586^_oxUJUIJs%8^l4TVop!tZ$$#OhrdhOEl>M>ueU$?u^`p(4L_<1Ir zfi@Xn90obf*pu|v2$o51AJaHsj*ZNR`%UP<&t>enEMN~a0Jpd%3*;$SR#RJAdCQbr zAeVDN;BLMzHT1sZbii@_eZ|Six1=dIjYM6RClz>R<7!c8#+Q$wIJw-~1V0t3C@%a5 z$Cc`rw|}!pZ11wGwqKm5-hO^|#P;qLk=whc3cn_OwGA?^k5*YPlPdFMPjBn7p4B6d z0gLsL#(G&2*2~+}hAyK-?qn4e#!;iKT}??DCh`x6q+_u}M{A}G_qRM~?K-WF-SM$R z8;nHm4Fk3qPWGa6Sv@_4TU6x!df*lVu!|du`@!SA{>yoRU7ZvBmpfDa-*lS&-*%e(Ew@ojJa``{PhZOEkw~9X(l;SZu0Xz^$7};Y zn-uypK^vrcQIHM#)N%4>p=>_VFQ_8)8?@x#qLR)fboUgVsv*CS8f~hGMw^;^wg{wu z$j?Cjow%~h7doTm`VJzw{ye4pE?iq6OMqYWxzpeEFivtCFXEmLdLEAH>Lj@huR)%+ z^7BijdMRG=M#mb(K$q0I(I8zXQ}ns5;87Dz<6tmHrT!!Q!2B{)e3WX3`1fUxz$7ePo3HG%!qPm;5T!IZjHxDN)HMIscttem(MmHP>wm=6%R{sdC;M z!Mya5$TQ1%uLbk!InN~L?F;6;Khkf5EOA!ISD?su^k)A|%A)&bcqUqt)qjF_Dyq-LuSyR3qCNs+>D7hkZ4?_lMi(EwQWQk5+_gFSyQ<@h zMRZ=YA$rGA5#8Lc+&n>wB)?IJa}Gs^7r^I*FQfefAot0)ll%j5z4T?0AHLw|pm|At z_}Zfrf1l)+`@y40{w+!5mSP{@w8cj5c9m}v+kP9a?el2+DfAB=+PR?@`CsTI{9ESI zxm9>hK!+A!3EcpOtKPJPa-rowi#P3n;R(y|wpvVEOW$L;td%G;M;qdx^Lt++=29<(`x z{s44#2z?xM*@_EG^^mzo;rTA|g6YE{>9&ycTOsM!k%oTR&&V>p2HF?$jF&;Tg~<3K zXsc+ny@cxvz;-|S^r|ie*QZkadyv0PUDO4Cy50hxW)arQnL_Gx2mvTyV%}8gPos3J7oRA&YQtUw%v#6U$XHvuxktKdPmO|+o7K4JZ$TSzuWKm9QV(J zZC5>bVClP>zPiykzw#z+{;GTa@h>%f^WGIY-EY19V!!mx(!HBc`gaHSZT^Yq+x(96 znvEv}Z5+I0`z})K`@S3OyZT?Z?*g>%M*H5j`bU3*y#V&W4VX^@mQ#W06kwYL%-@pn zzKwB0TO}d#BvYHgVnx3qpFv|jXvmy~blMt&PjrpzP)m6`K6}`fWZ&blcz0~JbbF4U z?YG8uh*~`61Z})VXmv(GZu4%Rp~Lu7+Ztf0^=IYhb|>U_qTXTDGYXw6x7BCZ2|g*{ z6Xo5xc`f)fZ`H*cg~nyw-E2U(jgv~O`O+#WU(;sIw{%D6Yu5+(96tQ0t(o}{^Jz46 zK6Jm$rSLVUH5oLt-eyVLV$gb}Se>_T?2&;F*m5I%I#(Z@qkOttq05sjQSnlORd3G9 z*3o2&VVnDB?bjUNZ_^5mu5-Xzo2&IjkItLC*#-T(gruwR4vKB|=INqFwb)!+qvCVD zaRv67neMVQpW)E?{V}<#=IV90*C@2EGPFHORDzBQ(7;Ly+PG?NS}xK$$nV;{#b5zn z3-Y5tqs*PXdyT=ld#&NnfJU3vtBud?kG2f<>9P}jd4JQEbn;-#Qm8>}1)n@4#dsvK z#$fCgZVBTG&9tA|EWk`l_uC|k2C=bk0czY?0#2hWd2UGGl?}R*KDpP0-W6 zA{pmstDZ6cpr>bGV+5s#;-0D49qcbZ)b`k>q&s{?H`kbCBvR{ z-e;i~&+*S;Z%5EZ)(6{Fc3QJRQwm#J6_F2rN=IcB|cMAt&==tZzm;GzSBHCkgGaD{VfD{!$KHve9O)Yl6)U;kxLj z7WC7kZnfSHUW`u%Fg%F5jE~Is1z!=z8m+fifgQM;fx8C%OpAVY82!wGewK@VmWzI7 z5!yT{Q788^@vWQykXFaq zyWyL-|FMn{i`cG*N8;JAdpm6XKm-+L3*#7LA9Anywx`gJz97-_F&JaV6LZ1wx0c-j;TQ(CFyb_O?rEzRkq0_V1D@GYFplz7S9aL6Skf2?Dt^q z7pF}@d{{L&KUlX7elGb;v+eMs@N-8VwKd^+pT}>or8xI{)+Qi$BoJ&(>ncxMYhXR+ zL03l7xHRBn0iR*3vX{5tyv4x%<}=s@^b*5;s_46(jcl{6vi<@#^rVD)O;-1OT))qb z*QozJc08Qz#dP33i{{WKIqt$|n~?Ya(pI^TOYlFzJ|48)t*~95t5RWO5&B+?l+bE0 zlDWJE{V&u{ULAXkxHk6Ccw-9=&2P48Uu;pv9rKTC5o;}5wei~H`(rq+a=+HN=jJD6 zCFW~(uCv*N#^r-_yar6_~x*@F9V?W%;gWpfsWt{z+YM_P%wtqz`|o$g}`5A@)Hg<2x#M z3hZy28f|t8x47I>DErNm5884++3(Q`t&ZhugU#i9vLE9vjV@G1f{e0!$5ieEY?l)w z+@~+{7`xFH8iS5^Umt^1kj;b`#&u5h?3b^h{^Dyj?Kmwcopm%HF(*_HSR^al4}Rxx z$~r_JXWa$$$UGZ*h2j*d+U5f(^)+|6kYyw{Z?I$J}X$1NN;!J1nqk z#%&vQjnnoyXvp2FiBI-6Y3D=d>|b1XrZ(!J!3tS4=m%WiNS5*z*h{FczPC>(vPzL@18*gYwi)TP50~yYI5Xd;lgCLyrRSI*)a@y? zMjjX0p_>-0KiQ$1R@ghnk!g&p2Dn~g-9TRH_yQQoQ>yY<9XRvpVu+mmv;s{2EsPkhQG$OOr;-lFrmekt81|}#8}-q7~gJD!GGhK zmJEzt)2wcbXf&L}edcso{5#9GLqb0tHE$!Mz%y2%tOV8}Y+wIrX7d4YwkAFhEs z#6P5YEGCbGFy_nTdtqx5kN+Hl3#7;#orHeCcFF#0n<~~NncECbRsTGV>7aqI z5wUqeta1|3ZxT)#8g9p21amszk+(%9Jj8Pt<=IZ~X8*heK1n(O{|*1#Fp1pTAn!ro z#Qe1ApVmGRv$)+^$6lXI)3#R8-rb-_-jtnS))Ahg13wMwnNSaNo97ck>S<8VjC#&V zluro<4c#lIK;K!IKffW5U0bMYd<#)J`hOB7v~Jp~3gE-G^S6Cjm3MW%k=SO)g*{jj z4jJs2U-LYm6653fD#{D?E9TK<{|%lc@J!g(3ao*=L4LLgwlhaBN@v?hmneNdeB_IB ze^#^w*Nt;G6n&~1=xV!fBVttFf`ddECzl%wF5%?)m{D(aH$%t9EUO#qjfE#yir7my zHC}yzVZ1+y^(n7C z#9oI_IruU@TvrQxSpR2xHx)6>>k-hq+X~yk`?9!p%xTK-EGO>KKu=om%Z2_W^qGb! z81ti_Fh-}|@ENfF9@i1lhMlmNcE~SreYQ)qf$fs-BRnSt^-rVxGpy%DKu35#@=xU* zfct{?T*7$q0Q9;a@-_o+))BqgSQN^q1M)i|zZrHB+HP*A24iH2*#0qB!x*P~#Z=hC z6xc-;)^OgG?U3gWMv`RTmlWAp#6Bo@ccUQ_dFIGfMVTDqcCX^GtSbjRcubyySda{T zCwWx`4anER_a~vxvM*VO{+rw<2Wg$_3yjZ_F$QuD&e2&k**c3JX9C7U&>hbkgJY5u z#C7JMgnN_V-(4aqkNIM}4u6uZbM=Q`2alWt@aY5JJ~Uf5AN`u`JXz@SD*EK;bdbj= zqFn1RZcD)&bB!K#g-(8^c@)-a5JS2C7VvFCog}ZWAQ>{{uxyH4PNB&&rDeY zpB5*s^?eC_8oo&FJchrf)U`#|2I3Q)a}ECedVQwzSFXbh((P<$1NS zxrr2mInncLkB#*=A53np7-hAvKFTo{ZL&1!w!?2_){(md_K0dRj49}}U&?e~jG2Re zq~^Y&*5u__f_+NlC0Py`%y>4h3tG{K`Cg2p-G^~6UxV`^4Q;dG{aKs|w#jw2sT*U^ z?i~{5Z<&~1TcGQ0@I#t9(rZVw0SnBZO^9o5a_6i6`klvl{nD3(@dfk~h$-;7kez+4r1)BsB6DyZ=NS5Q z`{?^^?F+VgG_=(wjWBPf?z@lknhwhytoNU&=i7ww{ic@ht@nMtx}_KGVIR$77Vd-N z3Re}4hrU0>dyC=lJKG+A+~#|HpQi)NseO)5j1-Zo-;L)W)B;(bymU? zPtLOFF@^vxc0IWr%o}=D2l~hQh>u>MTs-8dS@Y9ds;omCRl-VD^u*S_(ydW_IR<8U zrT98Q8F;H3hj(o;Ja)v@mT{$LTnpC4SthJQ>;C{d&p`f5XxB!x>rVL1Hth+-p_iba z;^XG6gW{#gA!G1zH70n^wv{h!-aiul3FQ|sZuMcEoZpRnpP&0j?^Xvuu9iNu=WBHY z;-hNECE$+tFgx0Yy>74r`>x|yW59JHbTH}W*9|{_oc81!^yL7ysjR4;W5`m^$=MvEx-WpU}e2<-bRf785<>E@6T0%oxK7*Y=$20h90tYA3zVUD0&!jRS!Y=+E>1^xku5%B}EUI@5y7if$G8914>(# zLC@bDS!?^*6f{xxz~b6B%U)r9Um{Xk{yT7 zP5GIy-7@I7O%3+J@)&3s=CahQ(bfpw)Gu(o12XYC8s6K* zaE$E_`cZYGt!(5;gQgF~B>hUa69+&bs zd#r+;vE6c7u$Otwq2cvlW@s&#?^VbTe^RXK@xf!=H0REo28|AzmR3k&n3(Q1>SR{ zV$O&8!ddXi6k3-B^R#4?;T=sLuk}j2j){B;b&RC19uDWOgp_^OzC4F+SOcZxiJAF46;NUzXm+*z^1<$jhptJw|-?Q&#_^7Uc!T z7P4@BP13Ytj7RPy@Xf#5gA_;m7_&yAJv=VKb0;ER z%XzTnJw7Fm>tG%t+d9AhnSpU3&wU9ohG_?E5AQwpX`ixMvKd!yH~1VuxdzuVg%8_F zg3=D2H}abLeB5h9{K;Y*Bix!h(Rw@8w8pM-o3-piW3+q748wELQlFg`gN-HX4)F`ksueM8bQA?X1jX+t1A zYE*H}y_8r&DL8Mp*=IUx%S@j&ZZOSr+*`V^+EZyNSXPCsX{D9bCX@Nz#2Sm1#@v#6 z%LG_`Vu`87QMtIHv}Sbivf_%;6w{QfnI>mdv8S}Mw5rxL88x(2;I4CcN=@d49{Un7 zwb!^k7Ao{O9aW1eLbCA{poIz?OKPjD@w{ub9ZM`UXWmTH_>9EFYuSsJRHC-CD!`z| zS!G|~K+ddcXH5vtDkd$YHJP%e71&IR9Uezz%_Nhfw$@SXnmjH&#Z*#S>{vE=?3ff& zO<2>fX|d>cs)s93?e?0MeV10)m+tMQ3eYNvWx_x}>hcQR}R(lI2dUF`2Ky3@OX1 z@j2omen4?`MO|eT>w}Uj9ZMmmDV0VAQi&NgruxL1dI+C(ODdUCZ^_`FREDC^xpJ9qfDh$C3wInx_0#7xPtRC&I?T7fPd+^i1R#7t|@TB zcVH=(kDrPW>K7xjdwtZ~rTJF7Hfl)UXZuakx5eBM_gVj{ftLrZHbf11WT?To#WZ5r zGsDe^+eY4Oe#&A@emq5g%R{5Dq*jgjbnL8*m&YYeSU2(ON%L;qJ9%Jct@YikkyBSp zduRI4AC_nD$@w$Tjw{Hx_ZUh(#dr1~}WXX+<;o4hBzH?5q%^5-j0 zuGBV6XjstjP{T6~uQznT7(n2qh}>R(=V*UC*_(3r&iHgz-~8JP9=hY$ypcr@-2LS}#f~$@o`q4)J+AVq(Qb8Z`@Nqm z`%^tjAn1@v(B?=%wUVH@a8{Xp1*Pf*CB+F!87OERzIMnSDyYOH$cIV6o{@siBJh|; z30j;gsCBF$!+1d}Cn0~bAV-#<)6)c%Bj|pY2i~&pnq0c~H>1O@iVd7F6?ypbs|+%6UT2+bx3TJT2(I zwhL~{U-#=vx0v4m7v}i z1l4~Nv__(r@df9PF?s8RlP$rA25tC=bTER$TzrJtn2hgwQ;D`@V6h2{K}!+PW*}%q z=McR$lc?MVQ=Uh(=0~vW`9#$vMDLannJS4&-9$gHBRaH<=&YBhXB9p*eUPYE6VW&8 zh(7ZZ9rzjeK2CJc&xvl@O7#9yM60$Djo3-_+H?4J_SZxoy-0M|%S6XsC9<{>ZF>{n z&bHy(U`%SCdza|aF?^GHg2?kB(bK<2(D;Pt!fB$|KVfmWo#+RhMAN?{$~i}L`+1_< zF5%nRZ-@-tkR3K4@C#={5S_4ANYQwa*aG~mQ~FWkqmL1tz7R==HvNF>VNFH;#yA~U zKKROgnlC7iz%>FaBJe&W0&y~;9XWR*lTZF;oDWWi9_$aIXaJtiPoT)`Nb0p0A3CIH zNOeU+A{Dy1JBq}hexzC-O`={;bY4$l13rBFJeJg-_Xn;6NX!^O5w(MWPdur*;wj?p zp%gjCNa_zE2x2tL7CyoVgb@fM5Jn)3Kp25A0$~Kg2!s&`BM?R)j6fKH|L+lqZj@Ib zD7rDQrV-s3SgVX~3@jc*HwON1b#x<^C-`WTf5Sgv1i}b}5eOp?Mj(tp7=bVXVFbbm zgb@fM5Jn)3Kp25A0$~Kg2!s&`BM?R)j6fKHFalu&!U%*B2qO?iAdEm5fiMDL1i}b} z5eOp?Mj(tp7=bVXVFbbmgb@fM5Jn)3Kp25A0$~Kg2!s&`BM?R)j6fKHFalu&!U%*B z2qO?iAdJBO8UiM8nuD_fC-yH4>_&(yAEBEEVvE76hn%)zHvsF^Ecrfx4FtppoY^?5 zaIVC;9_JINXUAW&FLTw@y6hzm54L^f3~W+c?BZt_f^SyMZK?g5d7 zoW1<>G4{q(rKM8c7+OxMDq38 zsj6Nwh8`8%#94Kfv_n9q4D9Q8yK^ZGD|A%lP@02gmMvRcT}#Ck>#y`&7#Izx0p_f1-UctyN*$TiyW}e{2~Q zhoE*NHmOu$&tZC5RZ_Z$_9K785Pw zF%v3$j2&`iUJt67SF;+eABGC+k?5L<3eV5L&jJ24ZTg@9M)9+OtM@f)4 zrD9EXt5}u4P|JHZ)9>)8cKl(X2<#C}zs1wa?4?Ufi|cAj?Pb_37|47lu)#B(S65;a zW%^aHAGB-Qw0}{p7mJUGCq&dyu}8I2^_$2p)#qYqi#pnDYK#~%ZCk`+>Zp&!9$_}0 zQJq)&#QmxnQE}06Gwz%=BQ82%u!T?=ww$F@Y(UC;Z@7XqHhpJ$l|sLn7@!|v8bU0_ zzN}pSj6!=-0`w~ieO{rDD|8oB!Sy>8`pPW<8YJ~Y!5(yEE@!crUaQbIDRjL;L*;Tg z?-R@Q$13#uN;&@Ck6iv2L!f-RLZ1gCu0Knm)3L`a(|0O#^vD3cK%udBsa)<+==G)m zjjd#5{(BXAwL*W4Jx-b5T7^Eqz6a@z3QbCRi$b3=1Hv~7U7=eP`zQ?3*dvz9ixv8ea=%NVEBXY=mnig7>>11TS1a^1>?zB1vqBF-6Pd=g z!?HXdEBby?p}Q1){Ys%z6?(ryTNV00vj}88^_AZ=i3#fyVy%*YRU}hTElaILG6hfO8B^&gZ>;Z!9yB9i9VDS_vu& zh0www<{E`Kg?h3d=Y)ejm7HLgBq!J_1ruZBRKVGDo}5xb4$~EZsuk6X5N_m*>P3Nj zl&HXYi#ejm2_>jZyN(#rFclB-=Je1>lwVcyI?y|EF#1CdClN?5_64?4Z zgg^|sTEa0Vlu!baU5*F;mXVCzUNj>u)lpOIbjp&>$t#{y10gs&oq_SBjZu_D9D**FU*=^&kxBf%nIbq4&;sJdUIya8v{ehn##39@@LHc;TZeO ztXYM@{Gy4d9Wb%!8L%%{pK^TjNz?)Kq|G3?+wu{IJnmf&kqVtzC2y0x2>I;t`9Rcd zvYJ%b`ublwhVN9W3DzF}2?u)({|N`W)zkINTTy4)yGmvadG8m0ezyCo*?-o}*)r&b zx?_K=D{8**>-xx-ZXH%~$Fr&_KbgGfzOm=G{PduUdi1^^xF_&ugosm8D zd+L`z@Vi%=v)80N6MJx3$B3qSt9s?mPy5_k_|53`yR|caV%PuvuyaM!%`de6v-|(2 s<;OnU_qy?o3A=W^F!QE+m$hl*3NkFWM^3MJ?$J+Pd#Uo~k7U^X8+AdC-~a#s literal 0 HcmV?d00001 diff --git a/examples/waveform-synth/waveform_synth.cpp b/examples/waveform-synth/waveform_synth.cpp new file mode 100644 index 0000000..988433e --- /dev/null +++ b/examples/waveform-synth/waveform_synth.cpp @@ -0,0 +1,206 @@ +/** + * waveform_synth.cpp — Waveform generation with ASCII art and CSV output + * + * Default (no args): ASCII art rendering of all waveforms + ADSR demo + * --csv flag: CSV output (columns: sample, sqr, tri, saw, pwm, sin, noise, envelope) + * + * Build: make ex_waveform + * Run: ./build/ex_waveform (ASCII art) + * ./build/ex_waveform --csv (CSV output) + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" + +#define NUM_SAMPLES 256 +#define ROWS 21 +#define COLS 64 +#define BAM_INC (65536 / NUM_SAMPLES) /* one full cycle in 256 samples */ +#define PWM_DUTY 49152 /* 75% duty = 49152/65536 */ + +/* Map s16 [-32767, +32767] to row [0, ROWS-1]. Top row = +32767. */ +static int val_to_row(s16 v) +{ + int row = (int)(ROWS - 1) - (int)(((long)v + 32767L) * (ROWS - 1) / 65534L); + if (row < 0) row = 0; + if (row >= ROWS) row = ROWS - 1; + return row; +} + +/* Subsample: pick COLS points from NUM_SAMPLES evenly */ +static int sample_index(int col) +{ + return (col * NUM_SAMPLES) / COLS; +} + +/* Print one ASCII waveform */ +static void ascii_wave(const char *title, s16 *buf, int n) +{ + printf("\n %s (%d samples, showing %d columns)\n", title, n, COLS); + + /* Build a character grid */ + char grid[ROWS][COLS + 1]; + for (int r = 0; r < ROWS; r++) { + for (int c = 0; c < COLS; c++) + grid[r][c] = ' '; + grid[r][COLS] = '\0'; + } + + /* Place asterisks */ + for (int c = 0; c < COLS; c++) { + int idx = sample_index(c); + if (idx >= n) idx = n - 1; + int r = val_to_row(buf[idx]); + grid[r][c] = '*'; + } + + /* Draw with axis labels */ + for (int r = 0; r < ROWS; r++) { + const char *label = ""; + if (r == 0) label = "+max"; + else if (r == ROWS / 2) label = " 0"; + else if (r == ROWS - 1) label = "-max"; + printf(" %5s |%s|\n", label, grid[r]); + } +} + +/* Print ASCII for ADSR envelope (0..32767 unipolar) */ +static void ascii_envelope(const char *title, s16 *buf, int n) +{ + printf("\n %s (%d samples, showing %d columns)\n", title, n, COLS); + + char grid[ROWS][COLS + 1]; + for (int r = 0; r < ROWS; r++) { + for (int c = 0; c < COLS; c++) + grid[r][c] = ' '; + grid[r][COLS] = '\0'; + } + + for (int c = 0; c < COLS; c++) { + int idx = sample_index(c); + if (idx >= n) idx = n - 1; + /* Envelope is 0..32767; scale to full grid: treat 0 as -32767 for display */ + s16 v = (s16)(buf[idx] * 2 - 32767); + int r = val_to_row(v); + grid[r][c] = '*'; + } + + for (int r = 0; r < ROWS; r++) { + const char *label = ""; + if (r == 0) label = " 1.0"; + else if (r == ROWS / 2) label = " 0.5"; + else if (r == ROWS - 1) label = " 0.0"; + printf(" %5s |%s|\n", label, grid[r]); + } +} + +/* ================================================================== */ +int main(int argc, char *argv[]) +{ + int csv_mode = 0; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--csv") == 0) csv_mode = 1; + } + + /* Generate waveform buffers */ + s16 buf_sqr[NUM_SAMPLES]; + s16 buf_tri[NUM_SAMPLES]; + s16 buf_saw[NUM_SAMPLES]; + s16 buf_pwm[NUM_SAMPLES]; + s16 buf_sin[NUM_SAMPLES]; + s16 buf_noise[NUM_SAMPLES]; + + u32 noise_state = 0xDEADBEEF; + u16 phase = 0; + + for (int i = 0; i < NUM_SAMPLES; i++) { + buf_sqr[i] = fr_wave_sqr(phase); + buf_tri[i] = fr_wave_tri(phase); + buf_saw[i] = fr_wave_saw(phase); + buf_pwm[i] = fr_wave_pwm(phase, PWM_DUTY); + buf_sin[i] = (s16)(fr_sin_bam(phase) >> 1); /* s15.16 -> s15 approx */ + buf_noise[i] = fr_wave_noise(&noise_state); + phase += BAM_INC; + } + + /* ADSR envelope demo */ + #define ENV_TOTAL 512 + s16 buf_env[ENV_TOTAL]; + + fr_adsr_t env; + fr_adsr_init(&env, + 64, /* attack samples */ + 32, /* decay samples */ + 16384, /* sustain level s0.15 (50%) */ + 64 /* release samples */ + ); + + fr_adsr_trigger(&env); + int release_at = 256; + for (int i = 0; i < ENV_TOTAL; i++) { + if (i == release_at) + fr_adsr_release(&env); + buf_env[i] = fr_adsr_step(&env); + } + + /* Combined: sin * envelope (amplitude modulation) */ + #define COMBINED_LEN ENV_TOTAL + s16 buf_combined[COMBINED_LEN]; + phase = 0; + for (int i = 0; i < COMBINED_LEN; i++) { + s32 sin_val = fr_sin_bam(phase); /* s15.16 */ + s32 env_val = (s32)buf_env[i]; /* 0..32767 (s0.15) */ + /* Multiply: (s15.16 * s0.15) >> 15 = s15.16, then >> 1 for s15 */ + s32 combined = (sin_val * env_val) >> 16; + if (combined > 32767) combined = 32767; + if (combined < -32767) combined = -32767; + buf_combined[i] = (s16)combined; + phase += BAM_INC; + } + + if (csv_mode) { + /* CSV header */ + printf("sample,sqr,tri,saw,pwm,sin,noise,envelope,combined\n"); + + int max_len = COMBINED_LEN; + for (int i = 0; i < max_len; i++) { + printf("%d", i); + printf(",%d", i < NUM_SAMPLES ? buf_sqr[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_tri[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_saw[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_pwm[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_sin[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_noise[i] : 0); + printf(",%d", buf_env[i]); + printf(",%d", buf_combined[i]); + printf("\n"); + } + } else { + printf("FR_Math — Waveform Synth Demo (v%s)\n", FR_MATH_VERSION); + printf(" %d samples/cycle, BAM increment = %d\n", NUM_SAMPLES, BAM_INC); + + ascii_wave("Square (fr_wave_sqr)", buf_sqr, NUM_SAMPLES); + ascii_wave("Triangle (fr_wave_tri)", buf_tri, NUM_SAMPLES); + ascii_wave("Sawtooth (fr_wave_saw)", buf_saw, NUM_SAMPLES); + ascii_wave("PWM 75%% (fr_wave_pwm)", buf_pwm, NUM_SAMPLES); + ascii_wave("Sine (fr_sin_bam)", buf_sin, NUM_SAMPLES); + ascii_wave("Noise (fr_wave_noise)", buf_noise, NUM_SAMPLES); + + printf("\n ADSR params: attack=64, decay=32, sustain=50%%, release=64\n"); + printf(" Trigger at sample 0, release at sample %d, total %d samples\n", + release_at, ENV_TOTAL); + ascii_envelope("ADSR Envelope (fr_adsr)", buf_env, ENV_TOTAL); + + ascii_wave("Sin * ADSR (amplitude modulation)", buf_combined, COMBINED_LEN); + + printf("\n Tip: run with --csv to get machine-readable output\n"); + } + + printf("\n--- end ---\n"); + return 0; +} diff --git a/keywords.txt b/keywords.txt index f7e784e..3dc5590 100644 --- a/keywords.txt +++ b/keywords.txt @@ -35,6 +35,7 @@ FR_printNumH KEYWORD2 FR_numstr KEYWORD2 fr_cos_bam KEYWORD2 fr_sin_bam KEYWORD2 +fr_tan_bam KEYWORD2 fr_cos KEYWORD2 fr_sin KEYWORD2 fr_tan KEYWORD2 diff --git a/llms.txt b/llms.txt index 4f13ef1..5930e4d 100644 --- a/llms.txt +++ b/llms.txt @@ -164,20 +164,24 @@ s32 ex = FR_EXP(I2FR(1, R), R); // e^1 ## Building ```bash -make lib # build static library objects -make test # run all test suites -make examples # build example program -make clean # remove build artifacts +make lib # build static library objects +make test # run all test suites +make examples # build example programs +make size-report # cross-compile size report (Docker) +make size-update # size report + patch doc files +make clean # remove build artifacts ``` ## Lean build options Define before including FR_math.h to exclude optional subsystems: +- `FR_CORE_ONLY` — shorthand for FR_NO_PRINT + FR_NO_WAVES (~1.9 KB saved) - `FR_NO_PRINT` — removes FR_printNumF/D/H and FR_numstr (~1.3 KB saved) - `FR_NO_WAVES` — removes fr_wave_*, fr_adsr_*, FR_HZ2BAM_INC (~0.6 KB saved) ## Platform support Tested on: AVR (Arduino), ARM Cortex-M0/M4, ESP32 (Xtensa), RISC-V, -x86/x64, MSP430, 68k, 8051. Code size is 4-8KB at -Os on 32-bit targets. +x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11. +Code size is 3-9 KB at -Os on 32-bit targets (Lean to Full). diff --git a/makefile b/makefile index c3f0c74..196e127 100644 --- a/makefile +++ b/makefile @@ -36,7 +36,8 @@ help: @echo "Build targets:" @echo " all Build library and examples" @echo " lib Build library objects only" - @echo " examples Build example program" + @echo " examples Build all example programs" + @echo " run-examples Build and run all desktop examples" @echo "" @echo "Test targets:" @echo " test Run all tests" @@ -92,11 +93,38 @@ $(BUILD_DIR)/FR_math_2D.o: $(SRC_DIR)/FR_math_2D.cpp $(HEADERS) # Build examples .PHONY: examples -examples: dirs $(BUILD_DIR)/fr_example +examples: dirs $(BUILD_DIR)/fr_example ex-basics ex-logexp ex-waveform ex-trig-accuracy $(BUILD_DIR)/fr_example: $(EXAMPLE_DIR)/posix-example/FR_Math_Example1.cpp $(BUILD_DIR)/FR_math.o $(BUILD_DIR)/FR_math_2D.o $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ +# Self-contained desktop examples (each has its own Makefile) +.PHONY: ex-basics ex-logexp ex-waveform ex-trig-accuracy run-examples + +ex-basics: + @$(MAKE) -C $(EXAMPLE_DIR)/fixed-point-basics + +ex-logexp: + @$(MAKE) -C $(EXAMPLE_DIR)/log-exp-curves + +ex-waveform: + @$(MAKE) -C $(EXAMPLE_DIR)/waveform-synth + +ex-trig-accuracy: + @if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \ + $(MAKE) -C $(EXAMPLE_DIR)/trig-accuracy; \ + else \ + echo "Skipping trig-accuracy (libfixmath not found)"; \ + fi + +run-examples: examples + @echo ""; echo "=== fixed-point-basics ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics run + @echo ""; echo "=== log-exp-curves ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves run + @echo ""; echo "=== waveform-synth ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth run + @if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \ + echo ""; echo "=== trig-accuracy ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy run; \ + fi + # Build and run tests .PHONY: test test: dirs examples test-basic test-comprehensive test-2d test-overflow test-full test-2d-complete test-tdd @@ -267,8 +295,16 @@ clean: rm -rf $(BUILD_DIR) $(COV_DIR) rm -f *.o *.gcda *.gcno *.gcov *.exe *.info +.PHONY: clean-examples +clean-examples: + @$(MAKE) -s -C $(EXAMPLE_DIR)/posix-example clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy clean 2>/dev/null || true + .PHONY: cleanall -cleanall: clean +cleanall: clean clean-examples rm -f *~ $(SRC_DIR)/*~ $(TEST_DIR)/*~ # Basic coverage info without lcov diff --git a/pages/assets/site.js b/pages/assets/site.js index d4d0ecc..0a83734 100644 --- a/pages/assets/site.js +++ b/pages/assets/site.js @@ -4,6 +4,9 @@ into every page so there's exactly one source of truth for the site title, version, menu, and legal line. + Version is loaded from version.json (generated by sync_version.sh) + so there is no hardcoded version string in this file. + Page skeleton expected in each HTML file: @@ -16,8 +19,6 @@ ════════════════════════════════════════════════════════════════════ */ (function () { - var FR_VERSION = 'v2.0.8'; - // Detect whether we're a top-level page or inside guide/. // Works for both file:// and http(s):// because we look for the // literal "/guide/" segment anywhere in the path. @@ -43,7 +44,7 @@ ]; // ----------------------------------------------------------------- - // Build header + // Build header (version placeholder filled in after fetch) // ----------------------------------------------------------------- var headerEl = document.getElementById('site-header'); if (headerEl) { @@ -60,12 +61,34 @@ headerEl.innerHTML = ''; } + // ----------------------------------------------------------------- + // Load version from version.json (generated by sync_version.sh) + // Falls back gracefully: file:// or missing file just hides the tag. + // ----------------------------------------------------------------- + var versionUrl = prefix + 'version.json'; + try { + var xhr = new XMLHttpRequest(); + xhr.open('GET', versionUrl, true); + xhr.onload = function () { + if (xhr.status === 200 || xhr.status === 0) { // 0 for file:// + try { + var data = JSON.parse(xhr.responseText); + var el = document.getElementById('site-version'); + if (el && data.version) { + el.textContent = 'v' + data.version; + } + } catch (e) { /* malformed JSON — leave blank */ } + } + }; + xhr.send(); + } catch (e) { /* XMLHttpRequest blocked (e.g. file:// CORS) — leave blank */ } + // ----------------------------------------------------------------- // Build footer // ----------------------------------------------------------------- diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html index 3e8a17f..fb67159 100644 --- a/pages/guide/api-reference.html +++ b/pages/guide/api-reference.html @@ -101,6 +101,8 @@

    Sentinel return values (FR_defs.h)

    FR_OVERFLOW_POS0x7FFFFFFF (INT32_MAX)Saturating ops when the true result exceeds +231. FR_OVERFLOW_NEG0x80000000 (INT32_MIN)Saturating ops when the true result is below −231. FR_DOMAIN_ERROR0x80000000 (INT32_MIN)Functions with an invalid input, e.g. FR_sqrt(-1), FR_log2(0), FR_asin(2.0). Shares the bit pattern of FR_OVERFLOW_NEG, so don’t mix a ≤ FR_OVERFLOW_NEG check with a domain check — test for the exact sentinel. +FR_TRIG_MAXVAL0x7FFFFFFF (INT32_MAX)Tangent saturation ceiling. Returned by fr_tan_bam, fr_tan, fr_tan_deg, and FR_TanI when the angle is near a pole (90° + k·180°). +FR_TRIG_MINVAL-FR_TRIG_MAXVALTangent saturation floor. Negative-side pole saturation. @@ -614,6 +616,7 @@

    BAM-native (the core)

    fr_cos_bams32 fr_cos_bam(u16 bam)s15.16, range [−65536, +65536]. Exact at cardinal angles. fr_sin_bams32 fr_sin_bam(u16 bam)s15.16, range [−65536, +65536]. Exact at cardinal angles. Defined as fr_cos_bam(bam − FR_BAM_QUADRANT). +fr_tan_bams32 fr_tan_bam(u16 bam)s15.16. Uses a 65-entry octant table for [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°−x) for (45°, 90°). Saturates to ±FR_TRIG_MAXVAL at the poles (90°, 270°). Returns exact 0 at 0° and 180°. No 64-bit intermediates; one 32-bit division only in the >45° path. diff --git a/pages/guide/building.html b/pages/guide/building.html index e4c4944..f2f3994 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -175,7 +175,9 @@

    Cross-compilation

    Motorola 68km68k-linux-gnu-gccDocker. Motorola 68HC11m68hc11-gccDocker. PowerPCpowerpc-linux-gnu-gccDocker. +MIPS32mipsel-linux-gnu-gccDocker. Xtensa LX106 (ESP8266)xtensa-lx106-elf-gccDocker. +Xtensa LX7 (ESP32-S3)xtensa-esp-elf-gccDocker (Espressif toolchain). 8051sdccManual. @@ -194,6 +196,7 @@

    Code size (.text section, compiled with -Os)

    + @@ -202,6 +205,7 @@

    Code size (.text section, compiled with -Os)

    + diff --git a/pages/guide/examples.html b/pages/guide/examples.html index be8d55c..273fede 100644 --- a/pages/guide/examples.html +++ b/pages/guide/examples.html @@ -18,7 +18,7 @@

    Examples

    Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.8 library with:

    +example compiles cleanly against the library with:

    cc -Isrc example.c src/FR_math.c -o example
     ./example
    @@ -209,19 +209,16 @@

    4. Logarithm, exponential, decibels

    5. Arctangent and atan2

    The inverse-trig functions in FR_Math return angles in -degrees, not radians — the output fits in -an s16 and you can feed it straight back into -FR_SinI / FR_CosI without any -conversion. This example exercises both FR_atan -(single-argument ratio) and FR_atan2 (full-circle, -two-argument).

    - -

    Caveats: FR_atan2 takes only two -arguments (y, x) and has no radix -parameter — it returns degrees in [−180, 180] as -s16. The radix argument on -FR_atan is the radix of the input ratio, -not of the output.

    +radians at a caller-chosen output radix. This +example exercises both FR_atan (single-argument +ratio) and FR_atan2 (full-circle, two-argument).

    + +

    Caveats: all inverse-trig functions take an +out_radix parameter that sets the radix of the +output. FR_atan2(y, x, out_radix) returns +radians in [−π, π] as s32 at the chosen +radix. FR_atan(input, radix, out_radix) has +separate radixes for input and output.

    #include <stdio.h>
     #include "FR_math.h"
    @@ -230,18 +227,19 @@ 

    5. Arctangent and atan2

    { const u16 r = 14; - /* atan(1) = 45 degrees */ - s16 a = FR_atan(I2FR(1, r), r); - printf("atan(1) = %d degrees (expect 45)\n", a); + /* atan(1) = pi/4 radians ≈ 0.7854 */ + s32 a = FR_atan(I2FR(1, r), r, r); + printf("atan(1) = %d (radix %d, expect ~%d)\n", + (int)a, r, (int)(12868)); /* pi/4 at r14 */ /* Full-circle atan2 */ - s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r)); /* 135 deg */ - s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r)); /* -135 deg */ - printf("atan2( 1,-1) = %d\n", q2); - printf("atan2(-1,-1) = %d\n", q3); + s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r); /* 3*pi/4 */ + s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r); /* -3*pi/4 */ + printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603)); + printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603)); /* asin with out-of-domain input */ - s16 bad = FR_asin(I2FR(2, r), r); + s32 bad = FR_asin(I2FR(2, r), r, r); if (bad == FR_DOMAIN_ERROR) printf("asin(2) rejected, good.\n"); return 0; @@ -563,6 +561,57 @@

    11. String round-trip and radix precision

    2^−24 ≈ 6 × 10^−8, so the last digit is always uncertain.

    +

    Desktop example programs

    + +

    In addition to the inline snippets above, the examples/ directory +contains four self-contained desktop programs. Each has its own +Makefile and README.md; build artifacts stay within the +example’s directory.

    + +
    TargetLeanCoreFull
    Xtensa LX7 (ESP32-S3)2.9 KB4.2 KB5.3 KB
    Cortex-M4 (STM32)3.3 KB4.4 KB5.5 KB
    Cortex-M0 (RP2040)3.4 KB4.5 KB5.7 KB
    ARM Thumb3.4 KB4.7 KB5.9 KB
    Xtensa LX106 (ESP8266)4.2 KB5.8 KB7.3 KB
    ARM324.3 KB5.8 KB7.7 KB
    68k4.4 KB6.2 KB7.8 KB
    MIPS324.7 KB6.6 KB8.7 KB
    x86-64 (GCC)4.6 KB6.1 KB8.0 KB
    AArch64 (ARM64)4.8 KB6.6 KB8.7 KB
    x86-325.3 KB7.2 KB9.2 KB
    + + + + + + + + + + + + + + + + + + + +
    DirectoryWhat it does
    examples/fixed-point-basics/Educational walkthrough of radix interpretation, I2FR/FR2I + round-trips, FR_NUM constant construction, aligned add/sub, + multiply precision, division, saturation, and FR_printNumF + formatted output.
    examples/log-exp-curves/Sweeps FR_log2, FR_ln, FR_log10, + FR_pow2, FR_EXP, FR_POW10, and + FR_sqrt against IEEE double reference values, printing + per-point and summary error tables.
    examples/waveform-synth/Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus + an ADSR envelope and amplitude-modulated combination. Default mode renders + ASCII art; --csv mode outputs machine-readable CSV.
    examples/trig-accuracy/Head-to-head comparison of FR_Math + (FR_SinI/FR_CosI/FR_TanI) vs + libfixmath (fix16_sin/fix16_cos/fix16_tan) + vs IEEE double over 0–360 degrees. Requires libfixmath source.
    + +

    Build all from the repo root:

    + +
    make examples        # builds all desktop examples
    +make run-examples    # builds and runs 1-3, plus 4 if libfixmath present
    + +

    Or build any single example from its directory:

    + +
    cd examples/waveform-synth
    +make run             # ASCII art output
    +make run-csv         # CSV output
    +

    See also

      diff --git a/pages/index.html b/pages/index.html index d8635c6..48284c7 100644 --- a/pages/index.html +++ b/pages/index.html @@ -53,24 +53,24 @@

      Measured accuracy

      - - - - - - - - - + + + + + + + + + - - - - + + + + - + - +
      FunctionMax err (%)*Avg err (%)Note
      sin/cos (BAM)0.15260.0030fr_sin_bam/fr_cos_bam direct; 129-entry table
      sin/cos (deg)0.15260.0029FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM
      sin/cos (rad)0.18280.0033fr_sin/fr_cos via fr_rad_to_bam ±2π r16
      tan (BAM)0.58230.0008fr_tan_bam 65536-pt full; ±maxint at poles
      tan (deg)0.53110.0008fr_tan_deg ±360° s15.16 full; sat at poles
      tan (rad)0.03860.0001fr_tan ±2π r16; r24 pole bypass
      asin / acos0.77710.028065536-pt; sqrt approx near boundary
      atan20.25640.023765536x5 radii; asin/acos+hypot_fast8
      atan0.24250.015520001-pt full sweep [-10,10]; via FR_atan2
      sin/cos (BAM)0.15260.0030very fast binary angle trig
      sin/cos (deg)0.15260.0029degree input trig fns
      sin/cos (rad)0.18280.0033radian (traditional) trig
      tan (BAM)0.58230.0008binary angle tangent; ±maxint at poles
      tan (deg)0.53110.0008degree input tangent; saturated at poles
      tan (rad)0.03860.0001radian (traditional) tangent
      asin / acos0.77710.0280reverse trig, radian output
      atan20.25640.0237reverse tangent, always safe
      atan0.24250.0155reverse tangent, accepts up to maxint
      sqrt0.00000.0000Round-to-nearest
      log20.01160.001665-entry mantissa table
      pow20.00180.000465-entry fraction table
      ln, log100.00040.0000Via FR_MULK28 from log2
      exp0.00030.0000FR_MULK28 + FR_pow2
      log20.01160.0016shift/add only for speed
      pow20.00180.0004shift/add only for speed
      ln, log100.00040.0000shift/add only for speed
      exp0.00030.0000shift/add only for speed
      exp_fast0.00090.0001Shift-only scaling
      pow100.00050.0000FR_MULK28 + FR_pow2
      pow100.00050.0000shift/add only for speed
      pow10_fast0.00220.0002Shift-only scaling
      hypot (exact)0.00000.000064-bit intermediate
      hypot (exact)0.00000.0000Uses 64-bit intermediate
      hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
      @@ -258,7 +258,7 @@

      History

      built for graphics transforms on 16 MHz 68k Palm Pilots (it shipped inside Trumpetsoft’s Inkstorm), then ported forward to ARM, x86, MIPS, RISC-V, and various 8/16-bit embedded -targets. v2.0.7 is the current release with a full test suite, +targets. The current release has a full test suite, bit-exact numerical specification, and CI on every push.

      License

      diff --git a/pages/releases.html b/pages/releases.html index e5ec364..3321c0c 100644 --- a/pages/releases.html +++ b/pages/releases.html @@ -5,7 +5,7 @@ Releases — FR_Math - + @@ -21,6 +21,19 @@

      Releases

      release_notes.md in the repo.

      +

      v2.0.8 — 2026

      + +

      Tangent accuracy rewrite and trig rounding fix.

      + +
        +
      • BAM-native tangent: new fr_tan_bam(u16 bam) with 65-entry octant table (130 bytes). No 64-bit math. FR_TanI, FR_Tan, fr_tan are now thin wrappers.
      • +
      • Round-to-nearest fix: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy.
      • +
      • Conversion macro trimming: FR_DEG2BAM and FR_RAD2BAM reduced to ~18–21 bits (from ~28 bits). Verified: no measurable accuracy impact.
      • +
      • FR_TRIG_MINVAL fixed: now -FR_TRIG_MAXVAL (was -FR_TRIG_MASK)
      • +
      + +
      +

      v2.0.7 — 2026

      README restructure, accuracy table cleanup, expanded cross-compile support.

      @@ -28,10 +41,10 @@

      v2.0.7 — 2026

      • FR_CORE_ONLY convenience define — single #define strips both print helpers and wave generators
      • Accuracy table cleanup — removed LSB column (percent error is the user-facing metric)
      • -
      • New cross-compile targets — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build
      • -
      • Two-column size table — Core (-DFR_CORE_ONLY) vs Full for every target
      • -
      • scripts/update_sizes.sh — auto-patches size tables from build/sizes.csv
      • -
      • README reordered: accuracy table first, then function list, then size table
      • +
      • New cross-compile targets — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build
      • +
      • Three-column size table — Lean / Core / Full for every target, sorted 8-bit → 64-bit
      • +
      • scripts/crossbuild_sizes.sh — consolidated script: Docker build, CSV + markdown output, doc patching
      • +
      • README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions

      @@ -351,7 +364,7 @@

      Timeline

      when it was written to run 2D graphics transforms on 16 MHz 68k Palm Pilots for Trumpetsoft’s Inkstorm. It has since been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and -16-bit embedded targets. v2.0.7 is the current release with a +16-bit embedded targets. The current release has a full test suite, a bit-exact numerical specification, and CI on every push.

      diff --git a/pages/version.json b/pages/version.json new file mode 100644 index 0000000..f81a375 --- /dev/null +++ b/pages/version.json @@ -0,0 +1 @@ +{"version":"2.0.8","hex":"0x020008"} diff --git a/release_notes.md b/release_notes.md index f5e1ef0..5ff9659 100644 --- a/release_notes.md +++ b/release_notes.md @@ -66,16 +66,18 @@ that varies with the chosen radix. - **RP2040 (Cortex-M0+)** and **STM32 (Cortex-M4)** added as named targets in the Docker cross-build -- **68HC11** toolchain added to the Docker image -- Size table now shows two columns: **Core** (`-DFR_CORE_ONLY`) and **Full** -- `docker/build_sizes.sh` outputs `build/sizes.csv` for automated patching -- New `scripts/update_sizes.sh` auto-patches size tables into README, docs, - and HTML pages +- **68HC11** and **MIPS32** toolchains added to the Docker image +- Size table now shows three columns: **Lean**, **Core**, and **Full** +- Consolidated `scripts/crossbuild_sizes.sh` — single script runs Docker, + builds all targets, writes CSV + markdown, and patches doc files + (replaces `crossbuild-docker.sh`, `size_report.sh`, `update_sizes.sh`) +- Size table sorted by architecture width (8-bit → 64-bit) ### README restructure Sections reordered: accuracy table moved above the size table to lead with -the library's primary selling point. Size table now shows Core vs Full columns. +the library's primary selling point. Badges cleaned up from Quikdown HTML to +standard markdown syntax. Build flavor descriptions made more concise. --- diff --git a/scripts/crossbuild_sizes.sh b/scripts/crossbuild_sizes.sh index b32a489..d6e1f85 100755 --- a/scripts/crossbuild_sizes.sh +++ b/scripts/crossbuild_sizes.sh @@ -10,7 +10,7 @@ # Requires: docker, xelp-crossbuild:latest image # # Output files: -# build/sizes.csv — raw CSV (target,lean,core,full) +# build/sizes.csv — raw CSV (target,width,lean,core,full) # build/sizes.md — markdown table # # With --update, patches these files between sentinels: @@ -89,56 +89,47 @@ build_text_size() { build_target() { local label="$1" - local compiler="$2" - local flags="$3" + local width="$2" + local compiler="$3" + local flags="$4" local lean_sz=$(build_text_size "$compiler" "$flags" "$LEAN_DEFS") local core_sz=$(build_text_size "$compiler" "$flags" "$CORE_DEFS") local full_sz=$(build_text_size "$compiler" "$flags" "$FULL_DEFS") - echo "${label},${lean_sz},${core_sz},${full_sz}" >> "$CSV" + echo "${label},${width},${lean_sz},${core_sz},${full_sz}" >> "$CSV" } # Write CSV header -echo "target,lean,core,full" > "$CSV" - -# --- x86 --- -build_target "GCC x86-64" "gcc" "" -build_target "Clang x86-64" "clang" "" -build_target "GCC x86-32" "gcc" "-m32" -build_target "TCC x86" "tcc" "" - -# --- ARM --- -build_target "GCC AArch64 (ARM64)" "aarch64-linux-gnu-gcc" "" -build_target "GCC ARM32" "arm-none-eabi-gcc" "" -build_target "GCC ARM Thumb" "arm-none-eabi-gcc" "-mthumb" -build_target "Cortex-M0 (RP2040)" "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb" -build_target "Cortex-M4 (STM32)" "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb" - -# --- Bare-metal: no stdint.h in sysroot --- -NOSTD="-DFR_NO_STDINT" - -# --- MSP430 --- -build_target "GCC MSP430" "msp430-gcc" "$NOSTD" - -# --- AVR --- -build_target "AVR ATmega328P" "avr-gcc" "$NOSTD -mmcu=avr5" -build_target "AVR ATtiny85" "avr-gcc" "$NOSTD -mmcu=attiny85" - -# --- 68HC11 --- -build_target "GCC 68HC11" "m68hc11-gcc" "$NOSTD" - -# --- 68k --- -build_target "GCC m68k" "m68k-linux-gnu-gcc" "" +echo "target,width,lean,core,full" > "$CSV" -# --- PowerPC --- -build_target "GCC PowerPC" "powerpc-linux-gnu-gcc" "" - -# --- RISC-V --- -build_target "RISC-V rv64" "riscv64-linux-gnu-gcc" "" -build_target "RISC-V rv32" "riscv64-unknown-elf-gcc" "$NOSTD -march=rv32imac -mabi=ilp32" - -# --- Xtensa --- -build_target "Xtensa LX106 (ESP8266)" "xtensa-lx106-elf-gcc" "$NOSTD" +# --- 8-bit --- +NOSTD="-DFR_NO_STDINT" +build_target "AVR ATmega328P" 8 "avr-gcc" "$NOSTD -mmcu=avr5" +build_target "AVR ATtiny85" 8 "avr-gcc" "$NOSTD -mmcu=attiny85" +build_target "68HC11" 8 "m68hc11-gcc" "$NOSTD" + +# --- 16-bit --- +build_target "MSP430" 16 "msp430-gcc" "$NOSTD" + +# --- 32-bit --- +build_target "Cortex-M0 (RP2040)" 32 "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb" +build_target "Cortex-M4 (STM32)" 32 "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb" +build_target "ARM32" 32 "arm-none-eabi-gcc" "" +build_target "ARM Thumb" 32 "arm-none-eabi-gcc" "-mthumb" +build_target "RISC-V rv32" 32 "riscv64-unknown-elf-gcc" "$NOSTD -march=rv32imac -mabi=ilp32" +build_target "Xtensa LX106 (ESP8266)" 32 "xtensa-lx106-elf-gcc" "$NOSTD" +build_target "Xtensa LX7 (ESP32-S3)" 32 "xtensa-esp-elf-gcc" "" +build_target "m68k" 32 "m68k-linux-gnu-gcc" "" +build_target "PowerPC" 32 "powerpc-linux-gnu-gcc" "" +build_target "MIPS32" 32 "mipsel-linux-gnu-gcc" "" +build_target "x86-32" 32 "gcc" "-m32" +build_target "TCC x86" 32 "tcc" "" + +# --- 64-bit --- +build_target "RISC-V rv64" 64 "riscv64-linux-gnu-gcc" "" +build_target "x86-64 (GCC)" 64 "gcc" "" +build_target "x86-64 (Clang)" 64 "clang" "" +build_target "AArch64 (ARM64)" 64 "aarch64-linux-gnu-gcc" "" echo "Docker build complete — $(grep -c , "$CSV") rows written to build/sizes.csv" ' @@ -154,8 +145,8 @@ if [ ! -f "${CSV}" ]; then exit 1 fi -# Sort by full size ascending (skip header) -SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k4,4n) +# Sort by width ascending, then full size ascending (skip header) +SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k5,5n) if [ -z "${SORTED}" ]; then echo "ERROR: No data rows in ${CSV}" >&2 @@ -178,30 +169,35 @@ fmt_kb() { echo "" echo "============================================================" echo "FR_math.c code size (.text bytes), compiled with -Os" +echo "Sorted by architecture width (8-bit → 64-bit)" echo "============================================================" echo "" -printf " %-28s %8s %8s %8s\n" "Target" "Lean" "Core" "Full" -printf " %-28s %8s %8s %8s\n" "----------------------------" "--------" "--------" "--------" -while IFS=',' read -r target lean core full; do - printf " %-28s %8s %8s %8s\n" "$target" "$lean" "$core" "$full" +printf " %-28s %5s %8s %8s %8s\n" "Target" "Width" "Lean" "Core" "Full" +printf " %-28s %5s %8s %8s %8s\n" "----------------------------" "-----" "--------" "--------" "--------" +while IFS=',' read -r target width lean core full; do + printf " %-28s %4s-b %8s %8s %8s\n" "$target" "$width" "$lean" "$core" "$full" done <<< "${SORTED}" echo "" -echo "Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt)" -echo "Core = -DFR_CORE_ONLY (+ degree trig, BAM tan, log10, hypot)" -echo "Full = all features (+ print, waves, ADSR)" +echo "Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt)" +echo "Core = -DFR_CORE_ONLY (Lean + degree/BAM trig, log10, hypot)" +echo "Full = default (Core + print, waves, ADSR)" echo "" # --- build/sizes.md --- { echo "# FR_math.c Code Sizes (.text bytes, -Os)" echo "" + echo "Sorted by architecture width (8-bit → 64-bit)." + echo "" echo "| Target | Lean | Core | Full |" echo "|--------|-----:|-----:|-----:|" - while IFS=',' read -r target lean core full; do + while IFS=',' read -r target width lean core full; do printf "| %s | %s | %s | %s |\n" "$target" "$(fmt_kb "$lean")" "$(fmt_kb "$core")" "$(fmt_kb "$full")" done <<< "${SORTED}" echo "" - echo "Lean = \`-DFR_LEAN -DFR_NO_PRINT\` | Core = \`-DFR_CORE_ONLY\` | Full = all features" + echo "**Lean** (\`-DFR_LEAN -DFR_NO_PRINT\`): radian trig, inv trig, log/exp, sqrt." + echo "**Core** (\`-DFR_CORE_ONLY\`): Lean + degree/BAM trig, log10, hypot." + echo "**Full** (default): Core + formatted print, wave generators, ADSR envelope." } > build/sizes.md echo "Wrote build/sizes.csv and build/sizes.md" @@ -214,9 +210,9 @@ fi # 4. Patch doc files # ----------------------------------------------------------------------- -# Build markdown replacement block +# Build markdown replacement block (width column is for sorting only, omit from output) MD_ROWS="" -while IFS=',' read -r target lean core full; do +while IFS=',' read -r target width lean core full; do row="| ${target} | $(fmt_kb "${lean}") | $(fmt_kb "${core}") | $(fmt_kb "${full}") |" if [ -n "${MD_ROWS}" ]; then MD_ROWS+=$'\n' @@ -254,9 +250,9 @@ patch_html() { return fi - # Build HTML rows + # Build HTML rows (skip width column) local html_rows="" - while IFS=',' read -r target lean core full; do + while IFS=',' read -r target width lean core full; do local tr="${target}$(fmt_kb "${lean}")$(fmt_kb "${core}")$(fmt_kb "${full}")" if [ -n "$html_rows" ]; then html_rows+=$'\n' diff --git a/scripts/sync_version.sh b/scripts/sync_version.sh index 4a7f763..2a33525 100755 --- a/scripts/sync_version.sh +++ b/scripts/sync_version.sh @@ -18,8 +18,7 @@ # src/FR_math.h — FR_MATH_VERSION string (derived from _HEX) # VERSION — plain-text "X.Y.Z" (derived from _HEX) # README.md — shields.io version badge -# README.md — "Current version:" line -# pages/assets/site.js — FR_VERSION constant (docs page header) +# pages/version.json — {"version":"X.Y.Z","hex":"0xMMmmpp"} for site.js # src/FR_math_2D.h — @version doxygen tag # src/FR_math_2D.cpp — @version doxygen tag # library.properties — Arduino Library Manager version @@ -196,20 +195,29 @@ update_file "README.md version badge" "${PROJECT_ROOT}/README.md" \ "s|(img\\.shields\\.io/badge/version-)[0-9]+\\.[0-9]+\\.[0-9]+(-[a-z]+\\.svg)|\${1}${VERSION}\${2}|g" # -------------------------------------------------------------------------- -# 4. README.md — "Current version: X.Y.Z" line in the Version section +# 4. pages/version.json — machine-readable version for site.js +# site.js fetches this at runtime so no hardcoded version in JS. # -------------------------------------------------------------------------- -update_file "README.md Current version: line" "${PROJECT_ROOT}/README.md" \ - "s|(Current version: )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g" - -# -------------------------------------------------------------------------- -# 5. pages/assets/site.js — FR_VERSION constant -# Pattern: var FR_VERSION = 'v2.0.0'; -# -------------------------------------------------------------------------- -update_file "pages/assets/site.js FR_VERSION" "${PROJECT_ROOT}/pages/assets/site.js" \ - "s|(var FR_VERSION = 'v)[0-9]+\\.[0-9]+\\.[0-9]+(';)|\${1}${VERSION}\${2}|g" +VER_JSON="${PROJECT_ROOT}/pages/version.json" +VER_JSON_WANT="{\"version\":\"${VERSION}\",\"hex\":\"${WANT_HEX}\"}" +VER_JSON_CUR="" +if [[ -f "${VER_JSON}" ]]; then + VER_JSON_CUR=$(cat "${VER_JSON}" | tr -d '[:space:]') +fi +VER_JSON_WANT_TRIMMED=$(echo "${VER_JSON_WANT}" | tr -d '[:space:]') +if [[ "${VER_JSON_CUR}" == "${VER_JSON_WANT_TRIMMED}" ]]; then + echo -e " ${GREEN}ok ${NC} pages/version.json" +elif [[ "${MODE}" == "check" ]]; then + echo -e " ${RED}DRIFT${NC} pages/version.json" + DRIFT=1 +else + echo "${VER_JSON_WANT}" > "${VER_JSON}" + echo -e " ${YELLOW}updated${NC} pages/version.json" + CHANGED=1 +fi # -------------------------------------------------------------------------- -# 6. src/FR_math_2D.h — @version doxygen tag +# 5. src/FR_math_2D.h — @version doxygen tag # -------------------------------------------------------------------------- update_file "src/FR_math_2D.h @version" "${PROJECT_ROOT}/src/FR_math_2D.h" \ "s|(\\@version )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g" diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp index da66323..4bff9b2 100644 --- a/tests/test_tdd.cpp +++ b/tests/test_tdd.cpp @@ -1931,7 +1931,7 @@ static void section_accuracy_table(void) { stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } - acc_row("sin/cos (BAM)", &st, "fr_sin_bam/fr_cos_bam direct; 129-entry table"); + acc_row("sin/cos (BAM)", &st, "very fast binary angle trig"); } /* --- sin / cos (degree wrappers: 65536-pt at s15.16) --- */ @@ -1954,7 +1954,7 @@ static void section_accuracy_table(void) { stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } - acc_row("sin/cos (deg)", &st, "FR_Sin/FR_Cos ±360° s15.16; FR_DEG2BAM"); + acc_row("sin/cos (deg)", &st, "degree input trig fns"); } /* --- sin / cos (radian wrappers: 65536-pt) --- */ @@ -1967,7 +1967,7 @@ static void section_accuracy_table(void) { stats_add(&st, actual_angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0); stats_add(&st, actual_angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0); } - acc_row("sin/cos (rad)", &st, "fr_sin/fr_cos via fr_rad_to_bam ±2π r16"); + acc_row("sin/cos (rad)", &st, "radian (traditional) trig"); } /* --- tan (BAM native: 65536-pt, full sweep) --- */ @@ -1981,7 +1981,7 @@ static void section_accuracy_table(void) { else ref = tan_ref(bam * 2.0 * M_PI / 65536.0); stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref), TAN_CLAMP); } - acc_row("tan (BAM)", &st, "fr_tan_bam 65536-pt full; ±maxint at poles"); + acc_row("tan (BAM)", &st, "binary angle tangent; ±maxint at poles"); } /* --- tan (degree wrappers: 65536-pt at s15.16, full sweep) --- */ @@ -2001,7 +2001,7 @@ static void section_accuracy_table(void) { double rad = d * M_PI / 180.0; stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } - acc_row("tan (deg)", &st, "FR_Tan ±360° s15.16 full; sat at poles"); + acc_row("tan (deg)", &st, "degree input tangent; saturated at poles"); } /* --- tan (radian wrappers: 65536-pt, full sweep) --- */ @@ -2013,7 +2013,7 @@ static void section_accuracy_table(void) { double actual_angle = frd(rad_fp, 16); stats_add(&st, actual_angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); } - acc_row("tan (rad)", &st, "fr_tan ±2π r16 full; sat at poles"); + acc_row("tan (rad)", &st, "radian (traditional) tangent"); } /* --- asin / acos --- */ @@ -2028,7 +2028,7 @@ static void section_accuracy_table(void) { rad = FR_acos((s32)i, 15, R); stats_add(&st, xd, frd(rad, R), q16(acos(xd)), M_PI); } - acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary"); + acc_row("asin / acos", &st, "reverse trig, radian output"); } /* --- atan2 --- */ @@ -2073,7 +2073,7 @@ static void section_accuracy_table(void) { s32 r = FR_atan2(fy, fx, R); stats_add(&st, specials_deg[si], frd(r, R), q16(atan2((double)fy, (double)fx)), M_PI); } - acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8"); + acc_row("atan2", &st, "reverse tangent, always safe"); } /* --- atan --- */ @@ -2087,7 +2087,7 @@ static void section_accuracy_table(void) { double ref = atan(actual_x); stats_add(&st, actual_x, frd(r, R), q16(ref), M_PI / 2.0); } - acc_row("atan", &st, "20001-pt full sweep [-10,10]; via FR_atan2"); + acc_row("atan", &st, "reverse tangent, accepts up to maxint"); } /* --- sqrt --- */ @@ -2131,7 +2131,7 @@ static void section_accuracy_table(void) { s32 r = FR_log2(fr, (u16)R, (u16)R); stats_add(&st, actual_x, frd(r, R), q16(log2(actual_x)), log2(32000.0)); } - acc_row("log2", &st, "65-entry mantissa table"); + acc_row("log2", &st, "shift/add only for speed"); } /* --- pow2 --- */ @@ -2145,7 +2145,7 @@ static void section_accuracy_table(void) { double ref = pow(2.0, actual_x); stats_add(&st, actual_x, frd(r, R), q16(ref), pow(2.0, 8.0)); } - acc_row("pow2", &st, "65-entry fraction table"); + acc_row("pow2", &st, "shift/add only for speed"); } /* --- ln, log10 --- */ @@ -2163,7 +2163,7 @@ static void section_accuracy_table(void) { ref = log10(actual_x); stats_add(&st, actual_x, frd(r, R), q16(ref), log10(32000.0)); } - acc_row("ln, log10", &st, "Via FR_MULK28 from log2"); + acc_row("ln, log10", &st, "shift/add only for speed"); } /* --- exp (FR_EXP) --- */ @@ -2178,7 +2178,7 @@ static void section_accuracy_table(void) { if (ref > 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */ stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } - acc_row("exp", &st, "FR_MULK28 + FR_pow2"); + acc_row("exp", &st, "shift/add only for speed"); } /* --- exp_fast (FR_EXP_FAST) --- */ @@ -2208,7 +2208,7 @@ static void section_accuracy_table(void) { if (ref > 32000.0 || ref < 1e-6) continue; stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } - acc_row("pow10", &st, "FR_MULK28 + FR_pow2"); + acc_row("pow10", &st, "shift/add only for speed"); } /* --- pow10_fast (FR_POW10_FAST) --- */ @@ -2241,7 +2241,7 @@ static void section_accuracy_table(void) { double ref = hypot(actual_x, actual_y); stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0)); } - acc_row("hypot (exact)", &st, "64-bit intermediate"); + acc_row("hypot (exact)", &st, "Uses 64-bit intermediate"); } /* --- hypot_fast8 (8-seg) --- */ diff --git a/tools/make_release.sh b/tools/make_release.sh index 28f7647..0ef8adb 100755 --- a/tools/make_release.sh +++ b/tools/make_release.sh @@ -112,7 +112,10 @@ do_sync_version() { echo "" echo " Running sync_version.sh to fix drift..." bash "${PROJECT_ROOT}/scripts/sync_version.sh" - git add -A + # Stage only the files sync_version.sh touches (not the whole tree). + git add src/FR_math.h VERSION README.md pages/version.json \ + src/FR_math_2D.h src/FR_math_2D.cpp \ + library.properties library.json idf_component.yml llms.txt pass "Version synced to $VER_STRING (changes staged)" else pass "All version strings match $VER_STRING" @@ -149,7 +152,7 @@ do_validate() { grep -E "Failed: [1-9]" "${test_log}" fail "Test failures detected" fi - TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum}') + TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum+0}') pass "${TOTAL_PASSED} tests passed." echo "" @@ -228,7 +231,7 @@ do_cross_compile() { # Files the pipeline itself may modify (badge update, version sync). # Anything outside this list is unexpected and should block the release. -PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/assets/site.js src/FR_math_2D.h src/FR_math_2D.cpp docs/README.md pages/index.html" +PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/version.json src/FR_math_2D.h src/FR_math_2D.cpp" do_commit_pipeline_changes() { step_header "Commit pipeline-generated changes" @@ -634,8 +637,8 @@ do_switch_master() { do_verify_master() { step_header "Verify build on master" - run_cmd make clean >/dev/null 2>&1 - run_cmd make test >/dev/null 2>&1 + make clean >/dev/null 2>&1 + make test >/dev/null 2>&1 pass "All tests pass on master." } From d8c596829a8567b9d8fb288b0a06a15ba3a4dfff Mon Sep 17 00:00:00 2001 From: deftio Date: Mon, 4 May 2026 19:37:49 -0700 Subject: [PATCH 6/7] docs and examples update, src clean up, accuracy metrics and tune up --- .gitignore | 4 ++-- README.md | 4 ++-- agents.md | 2 +- dev/FR_math_TODO.md | 8 +++++--- docs/README.md | 6 +++--- docs/building.md | 28 ++++++++++++++-------------- docs/getting-started.md | 7 ++++--- docs/releases.md | 7 +++---- pages/guide/building.html | 21 ++++++++++----------- pages/guide/getting-started.html | 8 +++++--- pages/index.html | 16 ++++++++-------- pages/releases.html | 7 +++---- release_management.md | 8 ++++---- scripts/build.sh | 4 ++-- src/FR_math.h | 23 ++++++++--------------- tests/fr_math_test.c | 2 +- 16 files changed, 75 insertions(+), 80 deletions(-) diff --git a/.gitignore b/.gitignore index 9be713d..10a7a12 100644 --- a/.gitignore +++ b/.gitignore @@ -56,8 +56,8 @@ htmlcov/ .idea/ *.sublime-* -# Claude Code local files -CLAUDE.local.md +# Claude (Anthropic) project-local files — not part of the library +CLAUDE*.md .claude/ # OS files diff --git a/README.md b/README.md index 0cf1c94..601e7a3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![License](https://img.shields.io/badge/License-BSD%202--Clause-blue.svg)](https://opensource.org/licenses/BSD-2-Clause) [![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) -[![Coverage](https://img.shields.io/badge/coverage-98%25-brightgreen.svg)](#building-and-testing) +[![Coverage](https://img.shields.io/badge/coverage-99%25-brightgreen.svg)](#building-and-testing) [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://deftio.github.io/fr_math/) [![Version](https://img.shields.io/badge/version-2.0.8-blue.svg)](release_notes.md) @@ -15,7 +15,7 @@ See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API **FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. -Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (raddix) per operation, trading precision and range explicitly instead of locking into a single format. FR_math is Pure C (C99/C11/C17,with) with C++ wrappers. +Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (radix) per operation, trading precision and range explicitly instead of locking into a single format. Pure C with C++ wrappers — compiles cleanly under C99, C11, C17, C++11 and later. Compiles under Arduino, PlatformIO, Espressif, many older embedded targets. Zero dependencies beyond ``. diff --git a/agents.md b/agents.md index 6bd9c32..30128db 100644 --- a/agents.md +++ b/agents.md @@ -31,7 +31,7 @@ dev/ Development notes and planning (not shipped) ```bash make lib # compile library objects -make test # run all 7 test suites (27+ tests) +make test # run full test suite (99% line coverage) make examples # build example programs make size-report # cross-compile size report (Docker) make size-update # size report + patch doc files diff --git a/dev/FR_math_TODO.md b/dev/FR_math_TODO.md index 84c2336..7d28a71 100644 --- a/dev/FR_math_TODO.md +++ b/dev/FR_math_TODO.md @@ -1,6 +1,8 @@ -# FR_Math TODO -[![Build Status](https://travis-ci.org/deftio/fr_math.svg?branch=master)](https://travis-ci.org/deftio/fr_math) -[![Coverage Status](https://coveralls.io/repos/github/deftio/fr_math/badge.svg?branch=master)](https://coveralls.io/github/deftio/fr_math?branch=master) +# FR_Math TODO + +[![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) + +*Historical scratchpad — not maintained as a roadmap. For current build/test, see repo root `README.md` and `agents.md`.* (c) M. A. Chatterjee 2000-2016 diff --git a/docs/README.md b/docs/README.md index 5dc8485..b0984f5 100644 --- a/docs/README.md +++ b/docs/README.md @@ -42,8 +42,8 @@ or any tooling. If you want the browser version, look in Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. See the -[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12, -16, and 24. +TDD report (run `make test-tdd` to generate `build/test_tdd_report.md`) +for sweeps at radixes 8, 12, 16, and 24. | Function | Max err (%)*| Avg err (%) | Note | @@ -227,7 +227,7 @@ Sizes measured with `arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -Os`. libfixmath covers trig/sqrt/exp in Q16.16 only; FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. -See [`scripts/crossbuild-docker.sh`](../scripts/crossbuild-docker.sh) for +See [`scripts/crossbuild_sizes.sh`](../scripts/crossbuild_sizes.sh) for the build script. ## History diff --git a/docs/building.md b/docs/building.md index 76eb31c..db8d192 100644 --- a/docs/building.md +++ b/docs/building.md @@ -14,9 +14,10 @@ FR_Math has no dependencies beyond a C99 compiler and - Optional: `lcov` / `gcov` for coverage reports. -There is no Autotools, no CMake, no Ninja, no package-manager -integration. The library is small enough that the Makefile fits on -one screen. +There is no Autotools, no Ninja, no package-manager integration. +The primary build system is a single Makefile. A `CMakeLists.txt` +exists for ESP-IDF integration only — it is not a general-purpose +CMake build. ## Makefile targets @@ -90,28 +91,27 @@ See `release_management.md` for the full step-by-step reference. ## The test suite -Tests live under `tests/` and are split into six +Tests live under `tests/` and are split into seven binaries to keep compile times low: | Binary | What it checks | | --- | --- | -| `test_basic` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding. | -| `test_trig` | Integer-degree trig (`fr_sin_deg` et al.). | -| `test_trig_radians` | Radian / BAM trig and the v2 `fr_sin` API. | -| `test_log_exp` | Log base 2 / ln / log10 and their inverses. | +| `fr_test` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding (legacy harness). | +| `test_comprehensive` | Trig (degree, radian, BAM), log/exp, sqrt, hypot. | | `test_2d` | 2D transforms, determinants, inverses. | -| `test_full_coverage` | Dark-corner cases: overflow sentinels, edge radixes, round-trips. | +| `test_overflow` | Overflow sentinels, saturation, edge radixes. | +| `test_full` | Full-coverage dark-corner cases and round-trips. | +| `test_2d_complete` | Extended 2D: matrix composition, inverse, point transforms. | | `test_tdd` | Characterization tests pinned to bit-exact reference values. | -As of v2.0.0 the suite contains **42 tests** across -those binaries and covers **99%** of the library source. +The suite covers **99%** of the library source. Every public symbol is exercised at least once. ### Running a single binary ```bash -make build/test_basic -./build/test_basic +make test-comprehensive +./build/test_comprehensive # or all of them at once make test @@ -227,7 +227,7 @@ To regenerate this table, run the Docker cross-build (requires the [xelp](https://github.com/deftio/xelp) Docker image): ```bash -scripts/crossbuild-docker.sh +scripts/crossbuild_sizes.sh ``` ### Example: RISC-V diff --git a/docs/getting-started.md b/docs/getting-started.md index 48028ce..2fefb6d 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -29,7 +29,7 @@ cd fr_math `build.sh` wipes `build/`, rebuilds the library, examples, and tests, and runs the full test suite. On success -the output shows 42 tests passing across six test binaries. +the output shows all tests passing (99% line coverage). ## A first program @@ -274,8 +274,9 @@ make test # build + run every test suite make coverage # coverage report (requires gcov) ``` -As of v2.0.1, FR_Math ships with 42 passing tests and 99% line -coverage across the library sources. +Run `make test` for a full pass. With `make coverage`, line coverage of +the library sources is about **99%**. See [Building & Testing](building.md) +for targets, cross-compilation, and CI. ## Next steps diff --git a/docs/releases.md b/docs/releases.md index a6072df..734eb1b 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -270,10 +270,9 @@ mandatory. ### Test suite -v2 ships with **42 tests** across six test binaries -and a characterization suite (`test_tdd.cpp`) that pins -numerical behavior to bit-exact reference values. Overall line -coverage is **99%** on the library sources. +v2 ships with a full test suite covering **99%** of library +source lines, plus a characterization suite (`test_tdd.cpp`) +that pins numerical behavior to bit-exact reference values. ## v1.0.3 — 2025 diff --git a/pages/guide/building.html b/pages/guide/building.html index f2f3994..5ab570f 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -107,30 +107,29 @@

      tools/make_release.sh

      The test suite

      -

      Tests live under tests/ and are split into six +

      Tests live under tests/ and are split into seven binaries to keep compile times low:

      - - - - + + - + + +
      BinaryWhat it checks
      test_basicRadix conversions, FR_ADD, FR_FixMuls, rounding.
      test_trigInteger-degree trig (fr_sin_deg et al.).
      test_trig_radiansRadian / BAM trig and the v2 fr_sin API.
      test_log_expLog base 2 / ln / log10 and their inverses.
      fr_testRadix conversions, FR_ADD, FR_FixMuls, rounding (legacy harness).
      test_comprehensiveTrig (degree, radian, BAM), log/exp, sqrt, hypot.
      test_2d2D transforms, determinants, inverses.
      test_full_coverageDark-corner cases: overflow sentinels, edge radixes, round-trips.
      test_overflowOverflow sentinels, saturation, edge radixes.
      test_fullFull-coverage dark-corner cases and round-trips.
      test_2d_completeExtended 2D: matrix composition, inverse, point transforms.
      test_tddCharacterization tests pinned to bit-exact reference values.
      -

      As of v2.0.0 the suite contains 42 tests across -those binaries and covers 99% of the library source. +

      The suite covers 99% of the library source. Every public symbol is exercised at least once.

      Running a single binary

      -
      make build/test_basic
      -./build/test_basic
      +
      make test-comprehensive
      +./build/test_comprehensive
       
       # or all of them at once
       make test
      @@ -252,7 +251,7 @@

      Lean build options

      To regenerate this table, run the Docker cross-build (requires the xelp Docker image):

      -
      scripts/crossbuild-docker.sh
      +
      scripts/crossbuild_sizes.sh

      Example: RISC-V

      diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html index d5d300d..e5076bf 100644 --- a/pages/guide/getting-started.html +++ b/pages/guide/getting-started.html @@ -46,7 +46,7 @@

      Install

      build.sh wipes build/, rebuilds the library, examples, and tests, and runs the full test suite. On success -the output shows 42 tests passing across six test binaries.

      +the output shows all tests passing (99% line coverage).

      A first program

      @@ -286,8 +286,10 @@

      Running the test suite

      make test           # build + run every test suite
       make coverage       # coverage report (requires gcov)
      -

      As of v2.0.1, FR_Math ships with 42 passing tests and 99% line -coverage across the library sources.

      +

      Run make test for a full pass. With make coverage, +line coverage of the library sources is about 99%. +See Building & Testing for targets, +cross-compilation, and CI.

      Next steps

      diff --git a/pages/index.html b/pages/index.html index 48284c7..9713405 100644 --- a/pages/index.html +++ b/pages/index.html @@ -46,8 +46,8 @@

      Measured accuracy

      Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. -See the TDD -report for sweeps at radixes 8, 12, 16, and 24.

      +Run make test-tdd to generate the TDD report +(build/test_tdd_report.md) with sweeps at radixes 8, 12, 16, and 24.

      @@ -131,14 +131,14 @@

      Lean build options

      are most useful when you include the library as a single .c file or static archive without section-level dead-code elimination.

      -

      Why fixed-point, in 2026?

      +

      Why fixed-point?

      -

      Most application code today has an FPU and can use float -freely. But there are still large, interesting corners where -fixed-point pays off:

      +

      Many modern microcontrollers have an FPU and can use float +freely. Older and low-cost MCUs remain common. Fixed-point is often faster and +more deterministic than float, and it excels in situations like:

        -
      • 8- and 16-bit MCUs (AVR, MSP430, 8051, sdcc) where the +
      • 8- and 16-bit MCUs (AVR, MSP430, 8051, SDCC) where the FPU does not exist and even software float is too slow or too large.
      • Hot inner loops on any CPU where a @@ -249,7 +249,7 @@

        Comparison

        FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. See -scripts/crossbuild-docker.sh +scripts/crossbuild_sizes.sh for the build script.

        History

        diff --git a/pages/releases.html b/pages/releases.html index 3321c0c..96e18bb 100644 --- a/pages/releases.html +++ b/pages/releases.html @@ -325,10 +325,9 @@

        Breaking changes

        Test suite

        -

        v2 ships with 42 tests across six test binaries -and a characterization suite (test_tdd.cpp) that pins -numerical behavior to bit-exact reference values. Overall line -coverage is 99% on the library sources.

        +

        v2 ships with a full test suite covering 99% of library +source lines, plus a characterization suite (test_tdd.cpp) +that pins numerical behavior to bit-exact reference values.

        v1.0.3 — 2025

        diff --git a/release_management.md b/release_management.md index 3353e3a..214f544 100644 --- a/release_management.md +++ b/release_management.md @@ -20,7 +20,7 @@ All version-bearing files are kept in sync via | `./scripts/build.sh` | Clean rebuild + run tests (one-shot) | | `./scripts/clean_build.sh` | Wipe `build/` and `coverage/`, recreate them | | `./scripts/coverage_report.sh` (or `make coverage`) | gcov coverage table | -| `./scripts/size_report.sh` (or `make size-report`) | Multi-arch object-size report | +| `./scripts/crossbuild_sizes.sh` (or `make size-report`) | Multi-arch object-size report (Docker) | | `./scripts/sync_version.sh` | Propagate `FR_MATH_VERSION_HEX` to every versioned file | | `./scripts/sync_version.sh --check` | Drift check (non-destructive) | | `./tools/make_release.sh` | Guided release pipeline (validate → PR → merge → tag → publish) | @@ -77,7 +77,7 @@ start, so do not run it inside a session that depends on pre-existing Invoked automatically by `make coverage` and by `tools/make_release.sh`. -### `scripts/size_report.sh` — multi-architecture size report +### `scripts/crossbuild_sizes.sh` — multi-architecture size report Compiles `src/FR_math.c` against every cross-toolchain it can find and prints a formatted table of object sizes. Architectures attempted: @@ -242,7 +242,7 @@ invoked individually. | Target | Effect | | --- | --- | -| `make size-report` | Delegates to `scripts/size_report.sh` (multi-arch table) | +| `make size-report` | Delegates to `scripts/crossbuild_sizes.sh` (multi-arch table, Docker) | | `make size-simple` | `size` (or `ls -lh`) on `build/*.o` for the current platform only | ### Clean @@ -290,7 +290,7 @@ loop is: ```bash ./scripts/build.sh # clean rebuild + tests ./scripts/coverage_report.sh # coverage after a change -./scripts/size_report.sh # size after a change +./scripts/crossbuild_sizes.sh # size after a change ``` --- diff --git a/scripts/build.sh b/scripts/build.sh index ade09a4..129c18a 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -75,7 +75,7 @@ echo -e "${GREEN} ok${NC}" # Print host-compiled library sizes so the developer can see how the # objects came out without having to dig in build/. This is host-only; -# for a multi-arch comparison run scripts/size_report.sh. +# for a multi-arch comparison run scripts/crossbuild_sizes.sh. print_host_size() { local host_arch host_arch="$(uname -m 2>/dev/null || echo unknown)" @@ -129,6 +129,6 @@ echo -e "${GREEN}=========================================${NC}" echo "" echo "Next steps:" echo " - ./scripts/coverage_report.sh (coverage analysis)" -echo " - ./scripts/size_report.sh (object file sizes)" +echo " - ./scripts/crossbuild_sizes.sh (object file sizes)" echo " - ./tools/make_release.sh (guided release pipeline)" echo "" diff --git a/src/FR_math.h b/src/FR_math.h index 6991d4f..a2db262 100644 --- a/src/FR_math.h +++ b/src/FR_math.h @@ -53,21 +53,14 @@ extern "C" #include "FR_defs.h" #endif -/* Quick Note on MACRO param wrapping: - * All macro inputs are wrapped in paranthesis in this code. - * eg: #define MACRO_X_SQUARED(x) ((x)*(x)) //<<-- note internal paranthesis - * this is done because macros use true source substitution in C/C++ so a if - * a macro internally uses many operators of mixed precedence e.g. >> and * together - * undesired behavior can result if the parameter "passed" in the the macro is a - * a complex contruct e.g. instead of being a value or single variable is a - * something like 3+4*5 --> all of this would gets substituted in to the MACRO - * expression and parans eliminate chances for odd behavior. - * For example: - * MACRO_X_SQUARED_BAD(x) (x*x) - * will expand this way: - * 3+4*5*3+4*5 ==> 3+60+20 == 83 // due to precedence operations whereas - * MACRO_X_SQUARED(x) ((x)*(x)) - * (3+4*5)*(3+4*5) ==> (3+20)*(3+20) == (23)*(23) == 529 +/* Quick note on macro parameter wrapping: + * Arguments are parenthesized in expansions, e.g. + * #define MACRO_X_SQUARED(x) ((x)*(x)) // inner parens around each x + * Macros substitute text as-is. If a parameter is an expression like 3+4*5 + * and the body mixes operators without extra parentheses, precedence errors + * follow. Parenthesize parameters (and fragile subexpressions) in the macro body. + * Example: MACRO_X_SQUARED_BAD(x) (x*x) -> 3+4*5*3+4*5 == 83 (wrong). + * MACRO_X_SQUARED(x) ((x)*(x)) -> (3+4*5)*(3+4*5) == 529 (right). */ /*absolute value for integer and fixed radix types*/ diff --git a/tests/fr_math_test.c b/tests/fr_math_test.c index 4c095cc..346b840 100644 --- a/tests/fr_math_test.c +++ b/tests/fr_math_test.c @@ -93,5 +93,5 @@ int main() else printf("tests failed.\n"); - return result; /* remember the value 0 is considered passing in a travis-ci sense */ + return result; /* remember the value 0 is considered passing in a ci sense */ } \ No newline at end of file From 2622ba66001b72403dbd87db3345d84187edeb27 Mon Sep 17 00:00:00 2001 From: deftio Date: Mon, 4 May 2026 19:48:58 -0700 Subject: [PATCH 7/7] docs and examples update, src clean up, accuracy metrics and tune up --- README.md | 50 +++++++++++++++++++++++----------------------- docs/README.md | 48 ++++++++++++++++++++++---------------------- pages/index.html | 52 ++++++++++++++++++++++++------------------------ 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 601e7a3..940bbf8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ [![License](https://img.shields.io/badge/License-BSD%202--Clause-blue.svg)](https://opensource.org/licenses/BSD-2-Clause) [![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) -[![Coverage](https://img.shields.io/badge/coverage-99%25-brightgreen.svg)](#building-and-testing) +[![Coverage](https://img.shields.io/badge/coverage-96%25-brightgreen.svg)](#building-and-testing) [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://deftio.github.io/fr_math/) [![Version](https://img.shields.io/badge/version-2.0.8-blue.svg)](release_notes.md) @@ -28,30 +28,30 @@ radix — Q16.16 is just the reference point for the table. At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the number of fractional bits available. - -| Function | Max err (%)*| Avg err (%) | Note | -|---|---:|---:|---| -| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | -| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | -| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | -| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | -| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | -| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | -| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | -| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | -| sqrt | 0.0000 | 0.0000 | Round-to-nearest | -| log2 | 0.0116 | 0.0016 | shift/add only for speed | -| pow2 | 0.0018 | 0.0004 | shift/add only for speed | -| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | -| exp | 0.0003 | 0.0000 | shift/add only for speed | -| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | -| pow10 | 0.0005 | 0.0000 | shift/add only for speed | -| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | - -*Relative error; reference clamped to 1% of full-scale output. + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. ### What's in the box diff --git a/docs/README.md b/docs/README.md index b0984f5..636de4a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -45,30 +45,30 @@ radix — Q16.16 is just the reference point for the table. See the TDD report (run `make test-tdd` to generate `build/test_tdd_report.md`) for sweeps at radixes 8, 12, 16, and 24. - -| Function | Max err (%)*| Avg err (%) | Note | -|---|---:|---:|---| -| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | -| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | -| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | -| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | -| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | -| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | -| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | -| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | -| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | -| sqrt | 0.0000 | 0.0000 | Round-to-nearest | -| log2 | 0.0116 | 0.0016 | shift/add only for speed | -| pow2 | 0.0018 | 0.0004 | shift/add only for speed | -| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | -| exp | 0.0003 | 0.0000 | shift/add only for speed | -| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | -| pow10 | 0.0005 | 0.0000 | shift/add only for speed | -| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | -| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | - -*Relative error; reference clamped to 1% of full-scale output. + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. ## What's in the box diff --git a/pages/index.html b/pages/index.html index 9713405..b449193 100644 --- a/pages/index.html +++ b/pages/index.html @@ -49,32 +49,32 @@

        Measured accuracy

        Run make test-tdd to generate the TDD report (build/test_tdd_report.md) with sweeps at radixes 8, 12, 16, and 24.

        - -
      - - - - - - - - - - - - - - - - - - - - - - -
      FunctionMax err (%)*Avg err (%)Note
      sin/cos (BAM)0.15260.0030very fast binary angle trig
      sin/cos (deg)0.15260.0029degree input trig fns
      sin/cos (rad)0.18280.0033radian (traditional) trig
      tan (BAM)0.58230.0008binary angle tangent; ±maxint at poles
      tan (deg)0.53110.0008degree input tangent; saturated at poles
      tan (rad)0.03860.0001radian (traditional) tangent
      asin / acos0.77710.0280reverse trig, radian output
      atan20.25640.0237reverse tangent, always safe
      atan0.24250.0155reverse tangent, accepts up to maxint
      sqrt0.00000.0000Round-to-nearest
      log20.01160.0016shift/add only for speed
      pow20.00180.0004shift/add only for speed
      ln, log100.00040.0000shift/add only for speed
      exp0.00030.0000shift/add only for speed
      exp_fast0.00090.0001Shift-only scaling
      pow100.00050.0000shift/add only for speed
      pow10_fast0.00220.0002Shift-only scaling
      hypot (exact)0.00000.0000Uses 64-bit intermediate
      hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
      -

      *Relative error; reference clamped to 1% of full-scale output.

      + + + + + + + + + + + + + + + + + + + + + + + + +
      FunctionMax err (%)*Avg err (%)Note
      sin/cos (BAM)0.15260.0030very fast binary angle trig
      sin/cos (deg)0.15260.0029degree input trig fns
      sin/cos (rad)0.18280.0033radian (traditional) trig
      tan (BAM)0.58230.0008binary angle tangent; ±maxint at poles
      tan (deg)0.53110.0008degree input tangent; saturated at poles
      tan (rad)0.03860.0001radian (traditional) tangent
      asin / acos0.77710.0280reverse trig, radian output
      atan20.25640.0237reverse tangent, always safe
      atan0.24250.0155reverse tangent, accepts up to maxint
      sqrt0.00000.0000Round-to-nearest
      log20.01160.0016shift/add only for speed
      pow20.00180.0004shift/add only for speed
      ln, log100.00040.0000shift/add only for speed
      exp0.00030.0000shift/add only for speed
      exp_fast0.00090.0001Shift-only scaling
      pow100.00050.0000shift/add only for speed
      pow10_fast0.00220.0002Shift-only scaling
      hypot (exact)0.00000.0000Uses 64-bit intermediate
      hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
      +

      *Relative error; reference clamped to 1% of full-scale output.

      What’s in the box