diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index c5765cd..bea9935 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -99,3 +99,27 @@ jobs: body_path: /tmp/release_notes.md draft: false prerelease: false + + publish-pio: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install PlatformIO + run: pip install platformio + - name: Publish to PlatformIO Registry + env: + PLATFORMIO_AUTH_TOKEN: ${{ secrets.PLATFORMIO_AUTH_TOKEN }} + run: pio pkg publish . --no-interactive + + publish-espressif: + needs: release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install compote (ESP Component Manager) + run: pip install idf-component-manager + - name: Publish to Espressif Component Registry + env: + IDF_COMPONENT_API_TOKEN: ${{ secrets.IDF_COMPONENT_API_TOKEN }} + run: compote component upload --name fr_math --namespace deftio diff --git a/.gitignore b/.gitignore index 9be713d..10a7a12 100644 --- a/.gitignore +++ b/.gitignore @@ -56,8 +56,8 @@ htmlcov/ .idea/ *.sublime-* -# Claude Code local files -CLAUDE.local.md +# Claude (Anthropic) project-local files — not part of the library +CLAUDE*.md .claude/ # OS files diff --git a/README.md b/README.md index 42982fc..940bbf8 100644 --- a/README.md +++ b/README.md @@ -1,98 +1,104 @@ [![License](https://img.shields.io/badge/License-BSD%202--Clause-blue.svg)](https://opensource.org/licenses/BSD-2-Clause) [![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) -[![Coverage](https://img.shields.io/badge/coverage-98%25-brightgreen.svg)](#building-and-testing) +[![Coverage](https://img.shields.io/badge/coverage-96%25-brightgreen.svg)](#building-and-testing) [![Docs](https://img.shields.io/badge/docs-online-blue.svg)](https://deftio.github.io/fr_math/) -[![Version](https://img.shields.io/badge/version-2.0.7-blue.svg)](release_notes.md) - +[![Version](https://img.shields.io/badge/version-2.0.8-blue.svg)](release_notes.md) + [![PlatformIO](https://img.shields.io/badge/PlatformIO-library-teal.svg)](https://registry.platformio.org/libraries/deftio/fr_math) [![Arduino](https://img.shields.io/badge/Arduino-library-teal.svg)](https://github.com/deftio/fr_math) [![ESP Component](https://img.shields.io/badge/ESP--IDF-component-teal.svg)](https://components.espressif.com/components/deftio/fr_math) - # FR_Math: A C Language Fixed-Point Math Library for Embedded Systems -FR_Math is a compact, integer-only fixed-point math library built for -systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from -legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it -provides a full suite of math primitives — trigonometry, logarithms, -roots, transforms, and signal generators — while remaining -deterministic, portable, and small. Unlike traditional fixed-point -libraries, FR_Math lets the caller choose the binary point per -operation, trading precision and range explicitly instead of locking -into a single format. Pure C (C99/C11/C17) with an optional C++ -2D-transform wrapper. Compiles under Arduino. Zero dependencies -beyond ``. +See: **[Documentation & Guide](https://deftio.github.io/fr_math/)** — for API reference, examples, fixed-point primer, build instructions. + + +**FR_Math** is a compact, integer-only fixed-point math library built for systems where floating point is too slow, too big, or unavailable. Designed for embedded targets ranging from legacy 16 MHz 68k processors to modern Cortex-M and RISC-V cores, it provides a full suite of math primitives — trigonometry, logarithms, roots, transforms, and signal generators — while remaining deterministic, portable, and small. + +Unlike most fixed-point libraries, FR_Math lets the caller choose the binary point (radix) per operation, trading precision and range explicitly instead of locking into a single format. Pure C with C++ wrappers — compiles cleanly under C99, C11, C17, C++11 and later. +Compiles under Arduino, PlatformIO, Espressif, many older embedded targets. +Zero dependencies beyond ``. + ### Measured accuracy Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. -Percent errors skip expected values near zero (|expected| < 0.01). At other radixes (3-bit, 24-bit, etc.) accuracy will differ due to the -number of fractional bits available. All functions support radix 0 to 30. - - -| Function | Max err (%) | Avg err (%) | Note | -|---|---:|---:|---| -| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials | -| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) | -| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0003 | 0.0000 | Round-to-nearest | -| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | -| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | -| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | -| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | -| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | -| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | +number of fractional bits available. + + +| Function | Max err (%)*| Avg err (%) | Note | +|---|---:|---:|---| +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. - + ### What's in the box | Area | Functions | -|---|---| -| Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | -| Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | -| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` | -| Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | -| Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | -| Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | -| Wave generators | `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` | -| Envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_release`, `fr_adsr_step` | -| 2D transforms | `FR_Matrix2D_CPT` (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) | -| Formatted output | `FR_printNumD`, `FR_printNumF`, `FR_printNumH`, `FR_numstr` | - -### Library size (FR_math.c only, `-Os`) - -Compiled object code sizes on select platforms (static test build). Your -sizes may vary depending on optimization and linker settings. Sizes -include all code and internal tables; everything is ROMable. +| --- | --- | +| Arithmetic | FR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX | +| Utility | FR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN | +| Trig (degree) | fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI | +| Trig (radian/BAM) | fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam | +| Inverse trig | FR_atan, FR_atan2, FR_asin, FR_acos | +| Log / exp | FR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 | +| Roots | FR_sqrt, FR_hypot, FR_hypot_fast8 | +| Wave generators | fr_wave_sqr, fr_wave_pwm, fr_wave_tri, fr_wave_saw, fr_wave_tri_morph, fr_wave_noise | +| Envelope | fr_adsr_init, fr_adsr_trigger, fr_adsr_release, fr_adsr_step | +| 2D transforms | FR_Matrix2D_CPT (mul, add, sub, det, inv, setrotate, XFormPtI, XFormPtI16) | +| Formatted output | FR_printNumD, FR_printNumF, FR_printNumH, FR_numstr | + +### Compiled library size (FR_math.c only, `-Os`) + +.text section sizes, all code + internal tables, ROMable. Sorted 8-bit → 64-bit. -| Target | Core | Full | -|--------|-----:|-----:| -| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB | -| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB | -| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB | -| ESP32 (Xtensa) | 3.5 KB | 5.2 KB | -| 68k | 3.5 KB | 5.3 KB | -| x86-64 (GCC) | 3.5 KB | 5.7 KB | -| x86-32 | 4.5 KB | 6.8 KB | -| MSP430 (16-bit) | 5.9 KB | 8.9 KB | -| 68HC11 | 10.8 KB | 16.0 KB | -| AVR (ATmega328P) | 7.0 KB | 10.6 KB | +| Target | Lean | Core | Full | +| --- | ---:| ---:| ---:| +| AVR ATmega328P (8-bit) | 9.2 KB | 12.8 KB | 15.4 KB | +| 68HC11 (8-bit) | 13.3 KB | 18.4 KB | 22.6 KB | +| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | +| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB | +| Cortex-M4 (32-bit) | 3.3 KB | 4.4 KB | 5.5 KB | +| Cortex-M0 (32-bit) | 3.4 KB | 4.5 KB | 5.7 KB | +| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB | +| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | +| m68k (32-bit) | 4.4 KB | 6.2 KB | 7.8 KB | +| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB | +| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | +| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | +| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | +| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | -Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves). -The optional 2D module adds ~1 KB. -\* MSP430, 68HC11, and AVR are 8/16-bit — every 32-bit operation expands to multiple instructions. -See [`docker/`](docker/) for the cross-compile setup. +**Lean** (`-DFR_LEAN -DFR_NO_PRINT`): radian trig, inv trig, log/exp, sqrt. +**Core** (`-DFR_CORE_ONLY`): Lean + degree/BAM trig, log10, hypot. +**Full** (default): Core + formatted print, wave generators, ADSR envelope. +Optional C++ 2D module adds ~1 KB. +8/16-bit targets expand some 32-bit op to multiple instructions — hence the larger sizes. +See [Building & Testing](docs/building.md) for the full cross-compile setup. ### Lean build options @@ -101,10 +107,10 @@ for ROM-constrained targets. Define them before including `FR_math.h` (or pass `-D` on the compiler command line): | Define | What it removes | Typical savings | -|---|---|---| -| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB | -| `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB | -| `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB | +| --- | --- | --- | +| FR_CORE_ONLY | Everything below (print + waves) | ~1.9 KB | +| FR_NO_PRINT | FR_printNumF, FR_printNumD, FR_printNumH, FR_numstr | ~1.3 KB | +| FR_NO_WAVES | fr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC | ~0.6 KB | `FR_CORE_ONLY` is a convenience shorthand that defines both `FR_NO_PRINT` and `FR_NO_WAVES` in one step. @@ -129,7 +135,7 @@ make lib # build static library make test # run all tests (unit, TDD characterization, 2D) ``` -## Quick taste +## Example ```c #include "FR_math.h" @@ -162,18 +168,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */ * * MixedCase FR_ names are functions — they contain loops, tables, or * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave + * generators, ADSR envelopes): + * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... + * + * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are + * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code + * should use the fr_ names directly. * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ @@ -206,23 +217,25 @@ The full docs ship in two forms — pick whichever fits how you read. **Terminal / editor (plain markdown):** - [docs/README.md](docs/README.md) — same content as plain markdown. - - [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md) - - [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md) +- [getting-started.md](docs/getting-started.md) | [fixed-point-primer.md](docs/fixed-point-primer.md) | [api-reference.md](docs/api-reference.md) +- [examples.md](docs/examples.md) | [building.md](docs/building.md) | [releases.md](docs/releases.md) ## History FR_Math has been in service since 2000, originally built for graphics transforms on 16 MHz 68k Palm Pilots. It shipped inside Trumpetsoft's *Inkstorm* on PalmOS, then moved forward through ARM, x86, MIPS, -RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current -release with a full test suite, bit-exact numerical specification, and -CI on every push. - +RISC-V, and various 8/16-bit embedded targets. +The current release now has a full test suite, numerical specification, and +CI on every push and better documentation. + ## License BSD-2-Clause — see [LICENSE.txt](LICENSE.txt). (c) 2000-2026 M. Chatterjee +PRs and suggestions are welcome.  Please be detailed as embedded systems can involve many tradeoffs. + ## For AI coding agents - [llms.txt](llms.txt) — machine-readable API summary @@ -230,5 +243,6 @@ BSD-2-Clause — see [LICENSE.txt](LICENSE.txt). ## Version -2.0.7 — see [release_notes.md](release_notes.md) for the v1 → v2 +See [release_notes.md](release_notes.md) for the v1 → v2 migration guide, numerical fixes, and new functionality. + diff --git a/VERSION b/VERSION index f1547e6..815e68d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.7 +2.0.8 diff --git a/agents.md b/agents.md index 2a461f6..30128db 100644 --- a/agents.md +++ b/agents.md @@ -14,16 +14,15 @@ Integer-only, zero dependencies, caller-selectable radix (binary point). ``` src/ Core library (this is what ships) FR_math.h Public API — all macros, function declarations, constants - FR_math.c All function implementations + FR_math.c All function implementations (trig tables inlined) FR_defs.h Type aliases (s8, s16, s32, u8, u16, u32) - FR_trig_table.h Precomputed sine table FR_math_2D.h/.cpp Optional C++ 2D transform class tests/ Test suite (7 programs, run via `make test`) examples/ Arduino .ino sketches + POSIX example docs/ Markdown documentation pages/ HTML documentation (mirrors docs/) -scripts/ Build, release, version sync helpers +scripts/ Build, release, version sync, size report helpers tools/ Coefficient generators (Python, C++) dev/ Development notes and planning (not shipped) ``` @@ -32,8 +31,10 @@ dev/ Development notes and planning (not shipped) ```bash make lib # compile library objects -make test # run all 7 test suites (27+ tests) -make examples # build POSIX example +make test # run full test suite (99% line coverage) +make examples # build example programs +make size-report # cross-compile size report (Docker) +make size-update # size report + patch doc files make clean # remove build artifacts ``` @@ -86,9 +87,11 @@ Versioned files (all synced automatically): 1. Bump `FR_MATH_VERSION_HEX` in `src/FR_math.h` 2. Run `./scripts/sync_version.sh` -3. Run `./tools/make_release.sh` (full validation gate) -4. Verify `llms.txt` and `agents.md` are current with any API changes -5. Commit, tag, push +3. Run `./scripts/crossbuild_sizes.sh --update` (rebuild size tables) +4. Run `./scripts/accuracy_report.sh --update` (rebuild accuracy tables) +5. Run `./tools/make_release.sh` (full validation gate) +6. Verify `llms.txt` and `agents.md` are current with any API changes +7. Commit, tag, push ## Lean build options @@ -96,13 +99,14 @@ Define before including `FR_math.h` to exclude optional subsystems: | Define | Removes | Savings | |---|---|---| +| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB | | `FR_NO_PRINT` | `FR_printNumF/D/H`, `FR_numstr` | ~1.3 KB | | `FR_NO_WAVES` | `fr_wave_*`, `fr_adsr_*`, `FR_HZ2BAM_INC` | ~0.6 KB | ## Platform targets The library compiles on: AVR (Arduino), ARM Cortex-M0/M4, ESP32, -RISC-V, x86/x64, MSP430, 68k, 8051. Code is 4–8 KB at `-Os` on +RISC-V, x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11. Code is 3–9 KB at `-Os` on 32-bit targets. ## Library publishing diff --git a/compare_lfm/comparison_results.json b/compare_lfm/comparison_results.json index adf0019..76b25d9 100644 --- a/compare_lfm/comparison_results.json +++ b/compare_lfm/comparison_results.json @@ -13,19 +13,19 @@ "double_reference": "std::sin", "sweep": "65536-pt, [-pi, +pi]", "speed": { - "fr_math_ns_per_call": 2.6, - "libfixmath_ns_per_call": 20.7, - "fr_math_speedup": 7.94, + "fr_math_ns_per_call": 7.4, + "libfixmath_ns_per_call": 12.7, + "fr_math_speedup": 1.72, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 1.34165039e-04, - "mean_abs_error": 4.23947344e-05, - "max_error_lsb": 8.8, - "mean_error_lsb": 2.8, - "max_rel_error_pct": 1.0615, - "mean_rel_error_pct": 0.0158 + "max_abs_error": 7.40851348e-05, + "mean_abs_error": 1.88938357e-05, + "max_error_lsb": 4.9, + "mean_error_lsb": 1.2, + "max_rel_error_pct": 0.4816, + "mean_rel_error_pct": 0.0081 }, "libfixmath": { "max_abs_error": 7.74511497e-03, @@ -43,19 +43,19 @@ "double_reference": "std::cos", "sweep": "65536-pt, [-pi, +pi]", "speed": { - "fr_math_ns_per_call": 4.8, - "libfixmath_ns_per_call": 18.4, - "fr_math_speedup": 3.86, + "fr_math_ns_per_call": 9.9, + "libfixmath_ns_per_call": 13.2, + "fr_math_speedup": 1.34, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 1.25349009e-04, - "mean_abs_error": 4.65658208e-05, - "max_error_lsb": 8.2, - "mean_error_lsb": 3.1, - "max_rel_error_pct": 0.9018, - "mean_rel_error_pct": 0.0161 + "max_abs_error": 6.76591807e-05, + "mean_abs_error": 2.03740409e-05, + "max_error_lsb": 4.4, + "mean_error_lsb": 1.3, + "max_rel_error_pct": 0.3282, + "mean_rel_error_pct": 0.0077 }, "libfixmath": { "max_abs_error": 7.75591931e-03, @@ -73,19 +73,19 @@ "double_reference": "std::tan", "sweep": "65536-pt, [-1.2, 1.2] rad", "speed": { - "fr_math_ns_per_call": 6.0, - "libfixmath_ns_per_call": 41.4, - "fr_math_speedup": 6.89, + "fr_math_ns_per_call": 14.1, + "libfixmath_ns_per_call": 37.2, + "fr_math_speedup": 2.64, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 8.49384425e-04, - "mean_abs_error": 1.04510886e-04, - "max_error_lsb": 55.7, - "mean_error_lsb": 6.8, - "max_rel_error_pct": 1.0080, - "mean_rel_error_pct": 0.0228 + "max_abs_error": 5.06554437e-04, + "mean_abs_error": 5.84009618e-05, + "max_error_lsb": 33.2, + "mean_error_lsb": 3.8, + "max_rel_error_pct": 0.5850, + "mean_rel_error_pct": 0.0122 }, "libfixmath": { "max_abs_error": 1.82495961e-02, @@ -104,19 +104,19 @@ "double_reference": "std::asin", "sweep": "65536-pt, [-0.999, 0.999]", "speed": { - "fr_math_ns_per_call": 11.5, - "libfixmath_ns_per_call": 53.7, - "fr_math_speedup": 4.67, + "fr_math_ns_per_call": 11.9, + "libfixmath_ns_per_call": 64.0, + "fr_math_speedup": 5.38, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 4.76933520e-04, - "mean_abs_error": 4.37641042e-05, - "max_error_lsb": 31.3, - "mean_error_lsb": 2.9, - "max_rel_error_pct": 0.5795, - "mean_rel_error_pct": 0.0134 + "max_abs_error": 3.79872462e-04, + "mean_abs_error": 1.04994412e-04, + "max_error_lsb": 24.9, + "mean_error_lsb": 6.9, + "max_rel_error_pct": 1.9776, + "mean_rel_error_pct": 0.0477 }, "libfixmath": { "max_abs_error": 1.01788963e-02, @@ -134,19 +134,19 @@ "double_reference": "std::acos", "sweep": "65536-pt, [-0.999, 0.999]", "speed": { - "fr_math_ns_per_call": 8.4, - "libfixmath_ns_per_call": 50.4, - "fr_math_speedup": 5.97, + "fr_math_ns_per_call": 11.1, + "libfixmath_ns_per_call": 65.0, + "fr_math_speedup": 5.88, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 4.72479065e-04, - "mean_abs_error": 4.33857475e-05, - "max_error_lsb": 31.0, - "mean_error_lsb": 2.8, - "max_rel_error_pct": 0.5194, - "mean_rel_error_pct": 0.0056 + "max_abs_error": 3.75418007e-04, + "mean_abs_error": 1.00708880e-04, + "max_error_lsb": 24.6, + "mean_error_lsb": 6.6, + "max_rel_error_pct": 0.2724, + "mean_rel_error_pct": 0.0093 }, "libfixmath": { "max_abs_error": 1.01897006e-02, @@ -164,17 +164,17 @@ "double_reference": "std::atan", "sweep": "65536-pt, [-50, 50]", "speed": { - "fr_math_ns_per_call": 8.0, - "libfixmath_ns_per_call": 11.2, - "fr_math_speedup": 1.41, + "fr_math_ns_per_call": 10.8, + "libfixmath_ns_per_call": 14.8, + "fr_math_speedup": 1.36, "faster": "fr_math" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 9.57408985e-04, - "mean_abs_error": 7.37662492e-05, - "max_error_lsb": 62.7, - "mean_error_lsb": 4.8, + "max_abs_error": 9.14677954e-04, + "mean_abs_error": 7.43583969e-05, + "max_error_lsb": 59.9, + "mean_error_lsb": 4.9, "max_rel_error_pct": 0.2149, "mean_rel_error_pct": 0.0061 }, @@ -194,19 +194,19 @@ "double_reference": "std::atan2", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 15.9, - "libfixmath_ns_per_call": 10.5, + "fr_math_ns_per_call": 20.8, + "libfixmath_ns_per_call": 13.7, "fr_math_speedup": 0.66, "faster": "libfixmath" }, "accuracy_vs_double": { "fr_math": { - "max_abs_error": 9.70679332e-04, - "mean_abs_error": 2.15170870e-04, - "max_error_lsb": 63.6, - "mean_error_lsb": 14.1, + "max_abs_error": 9.53437855e-04, + "mean_abs_error": 1.91371871e-04, + "max_error_lsb": 62.5, + "mean_error_lsb": 12.5, "max_rel_error_pct": 0.4122, - "mean_rel_error_pct": 0.0258 + "mean_rel_error_pct": 0.0239 }, "libfixmath": { "max_abs_error": 1.01728729e-02, @@ -225,9 +225,9 @@ "double_reference": "std::sqrt", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 18.6, - "libfixmath_ns_per_call": 19.8, - "fr_math_speedup": 1.06, + "fr_math_ns_per_call": 24.8, + "libfixmath_ns_per_call": 26.2, + "fr_math_speedup": 1.05, "faster": "fr_math" }, "accuracy_vs_double": { @@ -255,9 +255,9 @@ "double_reference": "std::exp", "sweep": "65536-pt, [-5, 5]", "speed": { - "fr_math_ns_per_call": 3.1, - "libfixmath_ns_per_call": 67.6, - "fr_math_speedup": 22.02, + "fr_math_ns_per_call": 4.0, + "libfixmath_ns_per_call": 84.6, + "fr_math_speedup": 21.04, "faster": "fr_math" }, "accuracy_vs_double": { @@ -285,9 +285,9 @@ "double_reference": "std::log", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 8.8, - "libfixmath_ns_per_call": 479.3, - "fr_math_speedup": 54.70, + "fr_math_ns_per_call": 11.2, + "libfixmath_ns_per_call": 583.3, + "fr_math_speedup": 51.87, "faster": "fr_math" }, "accuracy_vs_double": { @@ -315,9 +315,9 @@ "double_reference": "std::log2", "sweep": "65536-pt, [0.01, 100]", "speed": { - "fr_math_ns_per_call": 8.7, - "libfixmath_ns_per_call": 39.4, - "fr_math_speedup": 4.55, + "fr_math_ns_per_call": 11.3, + "libfixmath_ns_per_call": 51.1, + "fr_math_speedup": 4.53, "faster": "fr_math" }, "accuracy_vs_double": { @@ -345,8 +345,8 @@ "double_reference": "double a*b", "sweep": "65536-pt, a in [-50,50], b in [-2,2]", "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 1.2, + "fr_math_ns_per_call": 1.2, + "libfixmath_ns_per_call": 1.6, "fr_math_speedup": 1.33, "faster": "fr_math" }, @@ -375,9 +375,9 @@ "double_reference": "double a/b", "sweep": "65536-pt, a/b in [-50,50]/[0.5,50]", "speed": { - "fr_math_ns_per_call": 0.9, - "libfixmath_ns_per_call": 5.2, - "fr_math_speedup": 5.98, + "fr_math_ns_per_call": 1.2, + "libfixmath_ns_per_call": 6.9, + "fr_math_speedup": 5.96, "faster": "fr_math" }, "accuracy_vs_double": { @@ -406,7 +406,7 @@ "double_reference": "std::hypot", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 20.0 + "fr_math_ns_per_call": 26.2 }, "accuracy_vs_double": { "fr_math": { @@ -425,7 +425,7 @@ "double_reference": "std::hypot", "sweep": "65536-pt, 5 radii x 360 deg", "speed": { - "fr_math_ns_per_call": 2.4 + "fr_math_ns_per_call": 3.2 }, "accuracy_vs_double": { "fr_math": { diff --git a/compare_lfm/comparison_summary.md b/compare_lfm/comparison_summary.md index 9169c50..0f467ad 100644 --- a/compare_lfm/comparison_summary.md +++ b/compare_lfm/comparison_summary.md @@ -23,13 +23,13 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. | Function | FR max LSB | FR max %% | FR avg %% | lfm max LSB | lfm max %% | lfm avg %% | Winner | |----------|----------:|---------:|---------:|----------:|---------:|---------:|--------| -| sin | 8.8 | 1.0615 | 0.0158 | 507.6 | 74.5513 | 0.6105 | FR | -| cos | 8.2 | 0.9018 | 0.0161 | 508.3 | 74.4001 | 0.6121 | FR | -| tan | 55.7 | 1.0080 | 0.0228 | 1196.0 | 0.7099 | 0.0410 | FR | -| asin | 31.3 | 0.5795 | 0.0134 | 667.1 | 20.1233 | 2.4452 | FR | -| acos | 31.0 | 0.5194 | 0.0056 | 667.8 | 15.3142 | 0.3475 | FR | -| atan | 62.7 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR | -| atan2 | 63.6 | 0.4122 | 0.0258 | 666.7 | 20.0045 | 0.9267 | FR | +| sin | 4.9 | 0.4816 | 0.0081 | 507.6 | 74.5513 | 0.6105 | FR | +| cos | 4.4 | 0.3282 | 0.0077 | 508.3 | 74.4001 | 0.6121 | FR | +| tan | 33.2 | 0.5850 | 0.0122 | 1196.0 | 0.7099 | 0.0410 | FR | +| asin | 24.9 | 1.9776 | 0.0477 | 667.1 | 20.1233 | 2.4452 | FR | +| acos | 24.6 | 0.2724 | 0.0093 | 667.8 | 15.3142 | 0.3475 | FR | +| atan | 59.9 | 0.2149 | 0.0061 | 666.3 | 19.8632 | 0.4571 | FR | +| atan2 | 62.5 | 0.4122 | 0.0239 | 666.7 | 20.0045 | 0.9267 | FR | | sqrt | 0.5 | 0.0062 | 0.0001 | 0.5 | 0.0062 | 0.0001 | tie | | exp | 208.3 | 0.1486 | 0.0078 | 216.3 | 0.0756 | 0.0042 | FR | | ln | 3.2 | 0.3012 | 0.0006 | 2.2 | 0.0557 | 0.0002 | lfm | @@ -43,21 +43,21 @@ All errors measured vs IEEE 754 double. Pct errors skip |ref| < 0.01. | Function | FR_math | libfixmath | Speedup | Faster | |----------|--------:|-----------:|--------:|--------| -| sin | 2.6 | 20.7 | 7.94x | FR | -| cos | 4.8 | 18.4 | 3.86x | FR | -| tan | 6.0 | 41.4 | 6.89x | FR | -| asin | 11.5 | 53.7 | 4.67x | FR | -| acos | 8.4 | 50.4 | 5.97x | FR | -| atan | 8.0 | 11.2 | 1.41x | FR | -| atan2 | 15.9 | 10.5 | 0.66x | lfm | -| sqrt | 18.6 | 19.8 | 1.06x | FR | -| exp | 3.1 | 67.6 | 22.02x | FR | -| ln | 8.8 | 479.3 | 54.70x | FR | -| log2 | 8.7 | 39.4 | 4.55x | FR | -| mul | 0.9 | 1.2 | 1.33x | FR | -| div | 0.9 | 5.2 | 5.98x | FR | -| hypot | 20.0 | --- | --- | FR only | -| hypot_fast8 | 2.4 | --- | --- | FR only | +| sin | 7.4 | 12.7 | 1.72x | FR | +| cos | 9.9 | 13.2 | 1.34x | FR | +| tan | 14.1 | 37.2 | 2.64x | FR | +| asin | 11.9 | 64.0 | 5.38x | FR | +| acos | 11.1 | 65.0 | 5.88x | FR | +| atan | 10.8 | 14.8 | 1.36x | FR | +| atan2 | 20.8 | 13.7 | 0.66x | lfm | +| sqrt | 24.8 | 26.2 | 1.05x | FR | +| exp | 4.0 | 84.6 | 21.04x | FR | +| ln | 11.2 | 583.3 | 51.87x | FR | +| log2 | 11.3 | 51.1 | 4.53x | FR | +| mul | 1.2 | 1.6 | 1.33x | FR | +| div | 1.2 | 6.9 | 5.96x | FR | +| hypot | 26.2 | --- | --- | FR only | +| hypot_fast8 | 3.2 | --- | --- | FR only | ### Summary (13 head-to-head functions) diff --git a/dev/FR_math_TODO.md b/dev/FR_math_TODO.md index 84c2336..7d28a71 100644 --- a/dev/FR_math_TODO.md +++ b/dev/FR_math_TODO.md @@ -1,6 +1,8 @@ -# FR_Math TODO -[![Build Status](https://travis-ci.org/deftio/fr_math.svg?branch=master)](https://travis-ci.org/deftio/fr_math) -[![Coverage Status](https://coveralls.io/repos/github/deftio/fr_math/badge.svg?branch=master)](https://coveralls.io/github/deftio/fr_math?branch=master) +# FR_Math TODO + +[![CI](https://github.com/deftio/fr_math/actions/workflows/ci.yml/badge.svg)](https://github.com/deftio/fr_math/actions/workflows/ci.yml) + +*Historical scratchpad — not maintained as a roadmap. For current build/test, see repo root `README.md` and `agents.md`.* (c) M. A. Chatterjee 2000-2016 diff --git a/dev/misc/FR_math.c.checkpoint3 b/dev/misc/FR_math.c.checkpoint3 new file mode 100644 index 0000000..45c75c2 --- /dev/null +++ b/dev/misc/FR_math.c.checkpoint3 @@ -0,0 +1,1705 @@ +/** + * + * @file FR_math.c - c implementation file for basic fixed + * radix math routines + * + * @copy Copyright (C) <2001-2026> + * @author M A Chatterjee + * + * This file contains integer math settable fixed point radix math routines for + * use on systems in which floating point is not desired or unavailable. + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, please place an acknowledgment in the product documentation. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + * + */ + +#include "FR_math.h" +#include "FR_trig_table.h" + +#ifndef FR_NO_STDINT +#include +#endif + +/*======================================================= + * Full-precision radian/degree → BAM conversion helpers + * + * rad_to_bam_full(r) returns a full s32 BAM value where: + * upper 16 bits = integer BAM (the u16 table index) + * lower 16 bits = sub-BAM fractional part + * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. + * + * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are + * reordered so intermediate sums stay within s32 for |r| <= pi at r16. + */ +static s32 rad_to_bam_full(s32 r) +{ + /* 10 terms: 65536/(2*pi) ≈ 10430.37835... + * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 + * = 10430.378357 (~21-bit accuracy) + * Terms reordered: interleave negatives early to keep all intermediate + * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ + return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); +} + +/* deg_to_bam_full(d) — same idea for degrees. + * Input d must already be normalized to radix 16 and reduced to [-90, 90). + * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). + * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ +static s32 deg_to_bam_full(s32 d) +{ + return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); +} + +/* Normalize a fixed-radix value to radix 16. */ +static s32 normalize_to_r16(s32 val, u16 radix) +{ + return (radix > 16) ? (val >> (radix - 16)) + : (radix < 16) ? (val << (16 - radix)) + : val; +} + +/* Reduce non-negative radian (at r16) to [0, 2*pi). + * Helper used by range_reduce_rad and the near-pi small-angle paths. */ +static s32 reduce_to_2pi(s32 r) +{ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + if (r > (two_pi << 1)) + r -= (r / two_pi) * two_pi; + else if (r > two_pi) + r -= two_pi; + return r; +} + +/* Range-reduce radian value (at r16, non-negative) to [-pi, pi]. + * Caller guarantees r >= 0 (sign is handled externally). */ +static s32 range_reduce_rad(s32 r) +{ + r = reduce_to_2pi(r); + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + return r; +} + +/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. + * Normalizes to r16, reduces via positive-only path, applies shift-only multiply. + * Handles inputs beyond ±2*pi with modulus (slow path). */ +u16 fr_rad_to_bam(s32 rad, u16 radix) +{ + s32 r = normalize_to_r16(rad, radix); + /* BAM wraps naturally in u16, but range_reduce expects non-negative. + * For negative r: bam(-x) = -bam(x) mod 65536, so negate and let u16 wrap. */ + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = range_reduce_rad(r); + s32 bam_full = rad_to_bam_full(r); + if (sign < 0) bam_full = -bam_full; + return (u16)((bam_full + (1 << 15)) >> 16); +} + +/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. + * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ +u16 fr_deg_to_bam(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + + /* Reduce to [-180, 180) */ + if (d >= FR_D360_R16 || d < -FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + if (d < -FR_D180_R16) d += FR_D360_R16; + + /* Reduce to [-90, 90) with BAM quadrant offset */ + u16 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } + + return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); +} + +/*======================================================= + * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan + * + * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits + * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd + * quadrants (1, 3) reverse the in-quadrant index so the table is always read + * in the same direction. + * + * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at + * index 128). After mirroring, small full_pos → small output (near zero), + * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles + * below one table step (~0.7°). This eliminates table quantization error + * in the region where it matters most. + * + * Sign rule: quadrants 2 and 3 negate the result. + * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. + */ +s32 fr_sin_bam(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ + u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ + + /* Exact cardinal angles */ + if (inq == 0) { + if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ + if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ + return -FR_TRIG_ONE; /* 270° → -1.0 */ + } + + /* Odd quadrants mirror: read table from the far end */ + if (q == 1 || q == 3) + inq = FR_TRIG_QUADRANT - inq; + + s32 v; + + /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). + * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. + * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ + if (inq < FR_TRIG_FRAC_MAX) { + v = (s32)(((u32)inq * 102944u + 8192u) >> 14); + } else { + /* Table lookup with 7-bit interpolation fraction */ + u32 idx = inq >> FR_TRIG_FRAC_BITS; + u32 frac = inq & FR_TRIG_FRAC_MASK; + s32 lo = (s32)gFR_SIN_TAB_Q[idx]; + s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; + v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); + v <<= 1; /* u0.15 → s15.16 */ + } + + return (q >= 2) ? -v : v; +} + +s32 fr_cos_bam(u16 bam) +{ + /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ + return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); +} + +s32 fr_cos(s32 rad, u16 radix) +{ + if (rad == 0) return FR_TRIG_ONE; + s32 r = normalize_to_r16(rad, radix); + if (r < 0) r = -r; + r = reduce_to_2pi(r); + /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, + * cos(3π/2+δ) = sin(δ) ≈ δ. */ + s32 delta = r - FR_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return -delta; + delta = r - FR_THREE_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return delta; + return fr_cos_bam(fr_rad_to_bam(rad, radix)); +} + +s32 fr_sin(s32 rad, u16 radix) +{ + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); + /* Near 0 after reduction: sin(δ) ≈ δ */ + if (r < 256) { + s32 v = r; + return (sign < 0) ? -v : v; + } + /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ + delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ + if (r > FR_PI(16)) r -= FR_TWO_PI(16); + u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); + s32 v = fr_sin_bam(bam); + return (sign < 0) ? -v : v; +} + +/*======================================================= + * BAM-native tangent: fr_tan_bam + * + * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant + * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the + * second octant (45°, 90°). Result is s15.16 with saturation at the + * poles. + * + * No 64-bit intermediates. One 32-bit division only in the >45° path. + */ +s32 fr_tan_bam(u16 bam) +{ + u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ + u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ + s32 sign = 1; + u32 idx, frac; + s32 lo, hi, raw; + + /* Exact zeros: bam lands exactly on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Poles: bam lands exactly on 90° or 270° */ + if (inq == 0 && (q == 1 || q == 3)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + sign = -1; + } + + /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. + * Split into first octant [0, 45°) and second octant [45°, 90°). */ + if (inq < FR_TAN_OCTANT) { + /* First octant: direct table lookup + lerp. + * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, + * bottom FR_TAN_FRAC_BITS drive interpolation. */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near zero: redo interpolation with 4 extra bits of + * precision to reduce rounding error when result is small. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant: tan(x) = 1 / tan(90° - x). + * complement is in (0, 0x2000] = (0°, 45°]. */ + u32 comp = 0x4000u - inq; + + /* Look up tan(complement) from the table */ + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near pole: redo interpolation with 4 extra bits of + * precision. The reciprocal amplifies small interpolation + * errors, so extra precision significantly helps here. + * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + if (raw_hp < 32) { + raw = FR_TRIG_MAXVAL; + } else { + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + + return (sign < 0) ? -raw : raw; +} + +/* fr_tan — radian-input tangent with full sub-BAM precision. + * + * Goes directly to the 65-entry octant tangent table with 16-bit + * interpolation precision. Sign from quadrant, magnitude from table. + * No s64 intermediates. One 32-bit division in the second-octant path. + * + * Architecture: + * 1. Sign: determined by quadrant of the BAM position (Q1/Q3=+, Q2/Q4=-) + * 2. Magnitude: from octant table lookup + reciprocal identity + * - First octant [0,45°): direct table lerp + * - Second octant [45°,90°): 1/tan(90°-x) via reciprocal + * 3. Return sign * magnitude */ + +/* Internal: given a full s32 BAM, compute |tan| directly from the table. + * Returns the unsigned magnitude (always >= 0). */ +static s32 tan_mag_from_bam_full(s32 bam_full) +{ + u16 bam0 = (u16)(bam_full >> 16); + u32 frac_sub = (u32)bam_full & 0xFFFFu; + + u32 q = ((u32)bam0 >> 14) & 0x3u; + u32 inq = (u32)bam0 & 0x3FFFu; + + /* Exact zeros: tan(0°) = tan(180°) = 0 */ + if (inq == 0 && frac_sub == 0 && (q == 0 || q == 2)) + return 0; + + /* Exact poles: tan(90°) = tan(270°) → saturate */ + if (inq == 0 && frac_sub == 0 && (q == 1 || q == 3)) + return FR_TRIG_MAXVAL; + + /* Mirror odd quadrants (Q1, Q3) into the [0, 90°) range. + * After this, full_pos represents distance from the nearest zero. */ + u32 full_pos; + if (q == 1 || q == 3) + full_pos = ((u32)(0x4000u - inq) << 16) - frac_sub; + else + full_pos = ((u32)inq << 16) + frac_sub; + + /* Split at octant boundary (45° = 8192 BAM = 8192*65536 sub-BAM) */ + s32 raw; + if (full_pos < ((u32)FR_TAN_OCTANT << 16)) { + /* First octant [0, 45°): direct table lookup. + * 64 table intervals, each 2^23 sub-BAM units wide. */ + u32 idx = full_pos >> 23; + u32 frac16 = (full_pos >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near zero: redo with 4 extra bits of precision */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant [45°, 90°): tan(x) = 1 / tan(90° - x). + * Complement = distance from pole, in first-octant range. */ + u32 comp = ((u32)FR_TRIG_QUADRANT << 16) - full_pos; + + u32 idx = comp >> 23; + u32 frac16 = (comp >> 7) & 0xFFFFu; + + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (s32)(((s32)(hi - lo) * (s32)frac16 + (1 << 15)) >> 16); + + if (raw < 0x40) { + /* Near pole: redo with 4 extra bits then reciprocal */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (s32)(((s32)(hi4 - lo4) * (s32)frac16 + (1 << 15)) >> 16); + if (raw_hp < 32) + raw = FR_TRIG_MAXVAL; + else + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + return raw; +} + +s32 fr_tan(s32 rad, u16 radix) +{ + if (rad == 0) return 0; + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 r = normalize_to_r16(rad, radix); + s32 tan_sign = 1; + if (r < 0) { r = -r; tan_sign = -1; } + r = reduce_to_2pi(r); + /* Near-π small angle: tan(π + δ) = tan(δ) ≈ δ. */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + return (tan_sign < 0) ? -delta : delta; + } + /* Full pipeline */ + if (r > FR_PI(16)) + r -= FR_TWO_PI(16); + s32 bam_full = rad_to_bam_full(r); + + /* Sign from quadrant of the BAM position */ + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; +} + +/*======================================================= + * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then + * call the BAM-native functions. Cardinal angles are exact. + */ + +s32 fr_cos_deg(s32 deg, u16 radix) +{ + if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); + if (deg < 0) deg = -deg; + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return FR_TRIG_ONE; + if (rem == 90) return 0; + if (rem == 180) return -FR_TRIG_ONE; + if (rem == 270) return 0; + } + /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, + * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta = d - FR_D90_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return -dr; + } + delta = d - (FR_D90_R16 + FR_D180_R16); + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return dr; + } + } + return fr_cos_bam(fr_deg_to_bam(deg, radix)); +} + +s32 fr_sin_deg(s32 deg, u16 radix) +{ + if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); + s32 sign = 1; + if (deg < 0) { deg = -deg; sign = -1; } + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return 0; + if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; + if (rem == 180) return 0; + if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; + } + s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); + return (sign < 0) ? -v : v; +} + +s32 FR_TanI(s32 deg) +{ + /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ + s32 rem = deg % 180; + if (rem == 90 || rem == -90) + return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + return fr_tan_bam(FR_DEG2BAM_I(deg)); +} + +/* Internal: range-reduce degrees and produce full s32 BAM (used by fr_tan_deg). */ +static s32 range_reduce_deg_bam_full(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + s32 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = (s32)0x80000000u; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = (s32)0x80000000u; } + return offset + deg_to_bam_full(d); +} + +s32 fr_tan_deg(s32 deg, u16 radix) +{ + if (radix == 0) return FR_TanI(deg); + /* tan(-x) = -tan(x): factor out sign, reduce positive */ + s32 tan_sign = 1; + if (deg < 0) { deg = -deg; tan_sign = -1; } + /* Exact cardinal angles: tan is exactly 0 or ±MAXVAL */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 deg_int = deg >> radix; + s32 rem = deg_int % 180; + if (rem == 0) return 0; + if (rem == 90) return tan_sign > 0 ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + s32 bam_full = range_reduce_deg_bam_full(deg, radix); + u32 q = ((u32)((u16)(bam_full >> 16)) >> 14) & 0x3u; + s32 sign = (q == 1 || q == 3) ? -tan_sign : tan_sign; + s32 mag = tan_mag_from_bam_full(bam_full); + return (sign < 0) ? -mag : mag; +} + +/*======================================================= + * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) + * + * Treats x and y as fixed-point values at the same radix r and returns + * (x*y) >> r at radix r. The user is responsible for tracking the radix + * point and for guaranteeing the product fits in 32 bits. + * + * Adds 0.5 LSB (0x8000) before the shift so the result rounds to + * nearest instead of truncating toward zero. + */ +s32 FR_FixMuls(s32 x, s32 y) +{ + int64_t v = (int64_t)x * (int64_t)y; + return (s32)((v + 0x8000) >> 16); +} + +/*======================================================= + * FR_FixMulSat (x*y signed, SATURATED, round-to-nearest) + * + * Same semantics as FR_FixMuls but clamps to [INT32_MIN, INT32_MAX] on + * overflow instead of wrapping. The fixed-point radix is fixed at 16 bits + * (sM.16 inputs and output). Rounds to nearest (adds 0.5 LSB before shift). + */ +s32 FR_FixMulSat(s32 x, s32 y) +{ + int64_t v = ((int64_t)x * (int64_t)y + 0x8000) >> 16; + if (v > (int64_t)0x7fffffff) return FR_OVERFLOW_POS; + if (v < -(int64_t)0x80000000) return FR_OVERFLOW_NEG; + return (s32)v; +} + +/*======================================================= + FR_FixAddSat (x+y saturated add) + programmer must align radix points before using this function + */ +s32 FR_FixAddSat(s32 x, s32 y) +{ + s32 sum = x + y; + if (x < 0) + { + if (y < 0) + return (sum >= 0) ? FR_OVERFLOW_NEG : sum; + } + else + { + if (y >= 0) + return (sum <= 0) ? FR_OVERFLOW_POS : sum; + } + return sum; +} + +/* Inverse Trig + * acos with binary search of the BAM-native quadrant table. + * + * Algorithm: bring `input` into s0.15, then binary-search the first-quadrant + * cos table for the table entry closest to |input|. Apply quadrant mirror + * if input was negative. + */ +/* FR_acos — returns radians at out_radix. + * Range: [0, pi]. Input is a cosine value at the given radix. + * + * Uses the 129-entry sine table in reverse: binary-search the ascending + * table to find asin(|input|), then acos = pi/2 - asin (with sign handling + * for the second quadrant). + */ +s32 FR_acos(s32 input, u16 radix, u16 out_radix) +{ + s32 v; + s16 sign; + s32 lo, hi, mid; + s32 idx, d, num, frac; + s32 input_abs; + + /* Work with absolute value at the caller's radix */ + sign = (s16)((input < 0) ? 1 : 0); + input_abs = sign ? -input : input; + + /* Clamp at the caller's radix */ + { + s32 one = (s32)1 << radix; + if (input_abs >= one) + return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; + } + + v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ + + /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table + * has poor resolution near the top (entries close together). + * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] + * means the input is > cos(7*π/256) ≈ 0.9975. */ + if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) + { + s32 one = (s32)1 << radix; + s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ + s32 two_omx = one_minus_x << 1; /* 2(1-|x|) at caller radix */ + s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ + s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); + if (sign) + rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; + return rad_out; + } + + /* Binary search on the ascending sine table. + * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). + * + * Find the first index where table[idx] >= v. */ + lo = 0; + hi = FR_TRIG_TABLE_SIZE; + while (lo < hi) + { + mid = (lo + hi) >> 1; + if ((s32)gFR_SIN_TAB_Q[mid] < v) + lo = mid + 1; + else + hi = mid; + } + + /* lo is now the first index where table[lo] >= v. + * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. + * This gives us the asin angle; acos = pi/2 - asin. */ + idx = lo; + if (idx <= 0) + { + idx = 0; + frac = 0; + } + else if (idx >= FR_TRIG_TABLE_SIZE) + { + idx = FR_TRIG_TABLE_SIZE - 1; + frac = 0; + } + else + { + /* Interpolate between table[idx-1] and table[idx]. + * d = table[idx] - table[idx-1] (>= 0, sin increasing) + * num = v - table[idx-1] (how far past table[idx-1]) + */ + d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; + num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; + if (d > 0) + frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; + else + frac = 0; + idx = idx - 1; + } + + { + /* asin_bam is the angle in first-quadrant BAM whose sin = v */ + u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ + u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); + if (sign) + bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ + return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); + } +} + +/* FR_asin — returns radians at out_radix. Range: [-pi/2, pi/2]. */ +s32 FR_asin(s32 input, u16 radix, u16 out_radix) +{ + /* asin(x) = pi/2 - acos(x) */ + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); + return half_pi - FR_acos(input, radix, out_radix); +} + +/* FR_atan2(y, x, out_radix) — full-circle arctangent, returns radians + * at the specified output radix (s32). + * + * Range: [-pi, pi]. Returns 0 for atan2(0,0). + * + * Implementation: normalise (x,y) via FR_hypot_fast8, then recover the + * angle with FR_asin or FR_acos (both use the 129-entry cosine table). + * To stay in the well-conditioned region of each inverse function we + * switch at 45°: + * |y| <= |x| → use asin(y/h) — asin stable near 0 + * |y| > |x| → use acos(x/h) — acos stable near pi/2 + * This keeps the derivative amplification factor below 1.414x everywhere. + */ +s32 FR_atan2(s32 y, s32 x, u16 out_radix) +{ + s32 ax, ay, h, q1_angle; + + /* Axis cases — exact angles, no divide. */ + if (x == 0) + { + if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ + if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ + return 0; + } + if (y == 0) + return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ + + ax = (x < 0) ? -x : x; + ay = (y < 0) ? -y : y; + + /* Normalise so max(ax,ay) sits in [2^14, 2^15). This gives + * FR_hypot_fast8 enough integer bits for the shift-only segments + * to produce an accurate ratio — critical when the raw inputs are + * small (e.g. atan2(1,1) at radix 0). Scaling both by the same + * power of two doesn't change the angle. */ + { + s32 mx = (ax > ay) ? ax : ay; + while (mx < (1L << 14)) { ax <<= 1; ay <<= 1; mx <<= 1; } + while (mx >= (1L << 16)) { ax >>= 1; ay >>= 1; mx >>= 1; } + } + + h = FR_hypot_fast8((s32)ax, (s32)ay); + if (h == 0) return 0; /* degenerate */ + + /* Compute the first-quadrant angle (positive, [0..pi/2]). + * Divide produces a value in [0,1] at radix FR_TRIG_PREC (s0.15). + * + * Small-angle fast path: when the minor-axis ratio is small, + * asin(x) ≈ x (error < x³/6). Below ~5° the cubic term is + * smaller than the table-lookup error, so the direct identity + * is both faster and more accurate. Threshold 2753 at r15 + * corresponds to sin(~4.8°) = 0.084. */ + #define FR_ATAN2_SMALL 2753 + if (ay <= ax) + { + /* angle in [0°..45°]: use asin(ay/h) — well-conditioned near 0 */ + s32 sin_val = (s32)(((int64_t)ay << FR_TRIG_PREC) / h); + if (sin_val < FR_ATAN2_SMALL) + q1_angle = FR_CHRDX(sin_val, FR_TRIG_PREC, out_radix); + else + q1_angle = FR_asin(sin_val, FR_TRIG_PREC, out_radix); + } + else + { + /* angle in [45°..90°]: use acos(ax/h) — well-conditioned near pi/2 */ + s32 cos_val = (s32)(((int64_t)ax << FR_TRIG_PREC) / h); + if (cos_val < FR_ATAN2_SMALL) + { + /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); + q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); + } + else + q1_angle = FR_acos(cos_val, FR_TRIG_PREC, out_radix); + } + + /* Apply quadrant from signs of x and y. + * q1_angle is always positive [0..pi/2]. */ + { + s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); + if (x > 0) + return (y > 0) ? q1_angle : -q1_angle; + /* x < 0: mirror across y-axis */ + return (y > 0) ? (pi - q1_angle) : (q1_angle - pi); + } +} + +/* FR_atan(input, radix, out_radix) — arctangent of a single argument. + * Returns radians at out_radix, range [-pi/2, pi/2]. + */ +s32 FR_atan(s32 input, u16 radix, u16 out_radix) +{ + s32 one = (s32)1 << radix; + return FR_atan2(input, one, out_radix); +} + +/* 2^f table for f in [0, 1] in 65 entries (64 segments), output in s.16 + * fixed point. Entry i = round(2^(i/64) * 65536). Size: 260 bytes. + * Used by FR_pow2 to look up the fractional power of 2 with linear + * interpolation. + */ +static const u32 gFR_POW2_FRAC_TAB[65] = { + 65536, 66250, 66971, 67700, 68438, 69183, 69936, 70698, + 71468, 72246, 73032, 73828, 74632, 75444, 76266, 77096, + 77936, 78785, 79642, 80510, 81386, 82273, 83169, 84074, + 84990, 85915, 86851, 87796, 88752, 89719, 90696, 91684, + 92682, 93691, 94711, 95743, 96785, 97839, 98905, 99982, + 101070, 102171, 103283, 104408, 105545, 106694, 107856, 109031, + 110218, 111418, 112631, 113858, 115098, 116351, 117618, 118899, + 120194, 121502, 122825, 124163, 125515, 126882, 128263, 129660, + 131072 +}; + +/* FR_pow2(input, radix) — computes 2^(input/2^radix), result at same radix. + * + * Algorithm: split input into floor(integer) and fractional part. The + * fractional part is in [0, 1) by construction (Euclidean / mathematical + * floor — the fractional part of -2.3 is +0.7, not -0.3). Then + * 2^(int + frac) = 2^int * 2^frac + * where 2^frac is looked up from a 65-entry table at radix 16, and 2^int + * is a shift. + * + * Worst-case absolute error: ~1e-5 over [-8, 8] (65-entry table). + * Linear interpolation leaves a small concavity error in each interval. + */ +s32 FR_pow2(s32 input, u16 radix) +{ + s32 flr, frac_full, idx, frac_lo, lo, hi, mant, result; + u32 mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; + + /* Mathematical floor: for positive input it's input>>radix; for + * negative input we need to round toward -infinity, not toward zero. + */ + if (input >= 0) + { + flr = (s32)((u32)input >> radix); + frac_full = (s32)((u32)input & mask); + } + else + { + s32 neg = -input; + s32 nflr = (s32)((u32)neg >> radix); + s32 nfrc = (s32)((u32)neg & mask); + if (nfrc == 0) + { + flr = -nflr; + frac_full = 0; + } + else + { + flr = -nflr - 1; /* floor toward -inf */ + frac_full = (s32)((1L << radix) - nfrc); + } + } + + /* frac_full is in [0, 2^radix). Re-radix it to s.16 for table lookup. */ + if (radix > 16) + frac_full >>= (radix - 16); + else if (radix < 16) + frac_full <<= (16 - radix); + /* now frac_full is in [0, 65536) representing fractional in s.16. */ + + /* Top 6 bits index the table; bottom 10 are the interpolation fraction. */ + idx = frac_full >> 10; + frac_lo = frac_full & ((1L << 10) - 1); + lo = (s32)gFR_POW2_FRAC_TAB[idx]; + hi = (s32)gFR_POW2_FRAC_TAB[idx + 1]; + mant = lo + (((hi - lo) * frac_lo) >> 10); /* mant in s.16, in [1.0, 2.0) */ + + /* Apply integer shift. mant is at radix 16. We want output at `radix`. + * If radix == 16: just shift mant. + * Otherwise re-radix mant first. + */ + if (flr >= 0) + { + /* result = mant << flr, then re-radix to caller's radix. */ + if (flr >= 30) + return FR_OVERFLOW_POS; + result = mant << flr; + return FR_CHRDX(result, 16, radix); + } + else + { + /* mant >> -flr at radix 16, then re-radix. */ + s32 sh = -flr; + if (sh >= 30) + return 0; /* underflow */ + result = mant >> sh; + return FR_CHRDX(result, 16, radix); + } +} + +/* log2 mantissa table for m in [1, 2), m = 1 + i/64, returning log2(m) + * in s.16 fixed point. 65 entries (last is log2(2) = 1.0 = 65536) so the + * interpolation between idx and idx+1 never reads out of bounds. + * Size: 260 bytes. Entry i = round(log2(1 + i/64) * 65536). + */ +static const u32 gFR_LOG2_MANT_TAB[65] = { + 0, 1466, 2909, 4331, 5732, 7112, 8473, 9814, + 11136, 12440, 13727, 14996, 16248, 17484, 18704, 19909, + 21098, 22272, 23433, 24579, 25711, 26830, 27936, 29029, + 30109, 31178, 32234, 33279, 34312, 35334, 36346, 37346, + 38336, 39316, 40286, 41246, 42196, 43137, 44068, 44990, + 45904, 46809, 47705, 48593, 49472, 50344, 51207, 52063, + 52911, 53751, 54584, 55410, 56229, 57040, 57845, 58643, + 59434, 60219, 60997, 61769, 62534, 63294, 64047, 64794, + 65536 +}; + +/* FR_log2(input, radix, output_radix) — log base 2 of a fixed-point number. + * + * input : value to take log2 of, treated as a positive sM.radix value. + * radix : number of fractional bits in `input`. + * output_radix : number of fractional bits in the result. + * + * Returns FR_LOG2MIN for input <= 0 (log of zero/negative is undefined; we + * return a large negative sentinel rather than crash). + * + * Algorithm: + * 1. Find p, the position of the leading 1 bit of `input`. + * log2(input) = p + log2(input / 2^p), where the second term is in + * [0, 1) because (input / 2^p) is in [1, 2). + * 2. Normalize the mantissa to s1.31 by shifting `input` so its top bit + * sits at bit 31 (so bits 30..25 are the upper 6 bits of m-1). + * 3. Look up log2(m) in the 65-entry table with linear interpolation + * across the next 24 bits. Result is in s.16. + * 4. integer_part = (p - radix), then result = (integer_part << 16) + + * mantissa_log2. + * 5. Re-radix to the requested output_radix via FR_CHRDX. + * + * Worst-case absolute error: ~6e-5 in log2 units (65-entry table). + */ +s32 FR_log2(s32 input, u16 radix, u16 output_radix) +{ + s32 p, integer_part, idx, frac, lo, hi, mant_log2, result; + u32 m, u; + + if (input <= 0) + return FR_LOG2MIN; + + /* Step 1: find the position of the leading 1 bit. */ + u = (u32)input; + p = 0; + while (u > 1) + { + u >>= 1; + p++; + } + + /* Step 2: shift input so the leading 1 bit is at bit 30 (s1.30 mantissa). + * Equivalently: m = input << (30 - p), where m is in [2^30, 2^31). + * The fractional part of m / 2^30 is in [0, 1), and that's what we look + * up in the table. + */ + if (p >= 30) + m = (u32)input >> (p - 30); + else + m = (u32)input << (30 - p); + + /* m is now in [2^30, 2^31). Subtract 2^30 to get the fractional part + * (m_frac in [0, 2^30)). Index into the 64-entry table is the top 6 + * bits of m_frac; the lower 24 bits are the interpolation fraction. + */ + m -= (1u << 30); + idx = (s32)(m >> 24); /* 6 bits */ + frac = (s32)(m & ((1u << 24) - 1)); /* 24 bits */ + lo = (s32)gFR_LOG2_MANT_TAB[idx]; + hi = (s32)gFR_LOG2_MANT_TAB[idx + 1]; + mant_log2 = lo + (s32)(((int64_t)(hi - lo) * frac) >> 24); + + /* Step 3: assemble. integer_part = p - radix. */ + integer_part = p - (s32)radix; + result = (integer_part << 16) + mant_log2; + + /* Step 4: re-radix to output_radix. */ + return FR_CHRDX(result, 16, output_radix); +} + +s32 FR_ln(s32 input, u16 radix, u16 output_radix) +{ + s32 r = FR_log2(input, radix, output_radix); + return FR_MULK28(r, FR_krLOG2E_28); +} + +s32 FR_log10(s32 input, u16 radix, u16 output_radix) +{ + s32 r = FR_log2(input, radix, output_radix); + return FR_MULK28(r, FR_krLOG2_10_28); +} + +#ifndef FR_NO_PRINT +/*************************************** + * FR_printNumD - write a decimal integer with space padding. + * + * Equivalent to "%*d" in printf, modulo the return convention. + * + * f : per-character output function (e.g. putchar). Must not be NULL. + * n : signed integer to print. + * pad : minimum field width; spaces are prepended to reach this width. + * + * Returns the number of characters written on success, or -1 if `f` is NULL. + */ +int FR_printNumD(int (*f)(char), int n, int pad) +{ + unsigned int mag; + int written = 0, neg = 0; + int digits = 1; + unsigned int t; + + if (!f) + return -1; + + if (n < 0) + { + neg = 1; + mag = (unsigned int)(-(long)n); /* safe for INT_MIN */ + } + else + { + mag = (unsigned int)n; + } + + /* Count decimal digits in mag (always at least 1 for n=0). */ + t = mag; + while (t >= 10) + { + t /= 10; + digits++; + } + + /* Pad with spaces. The total width includes the sign. */ + { + int total = digits + (neg ? 1 : 0); + while (pad-- > total) + { + f(' '); + written++; + } + } + + if (neg) + { + f('-'); + written++; + } + + /* Print digits MSB first by computing the largest power of 10 <= mag. */ + { + unsigned int p = 1; + int i; + for (i = 1; i < digits; i++) + p *= 10; + while (p > 0) + { + f((char)('0' + (mag / p) % 10)); + written++; + if (p == 1) + break; + p /= 10; + } + } + + return written; +} + +/*************************************** + * FR_printNumF - write a fixed-point number as a decimal floating-point string. + * + * f : per-character output function. Must not be NULL. + * n : signed fixed-point value at the given radix. + * radix : number of fractional bits in `n`. + * pad : minimum field width (including sign and decimal point). + * prec : number of fractional digits to print. + * + * Returns the number of characters written on success, -1 if `f` is NULL. + * + * Rounding policy: truncates fractional digits beyond `prec` (no rounding). + */ +int FR_printNumF(int (*f)(char), s32 n, int radix, int pad, int prec) +{ + unsigned int mag_int; + u32 mag_frac; + u32 frac_mask; + int written = 0, neg = 0; + int int_digits = 1; + int total; + unsigned int t; + + if (!f) + return -1; + + frac_mask = (radix > 0) ? (((u32)1 << radix) - 1) : 0; + + if (n < 0) + { + neg = 1; + /* Negate as unsigned to avoid INT_MIN overflow. */ + u32 un = (u32)(-(int64_t)n); + mag_int = (unsigned int)(un >> radix); + mag_frac = un & frac_mask; + } + else + { + mag_int = (unsigned int)((u32)n >> radix); + mag_frac = (u32)n & frac_mask; + } + + /* Count integer digits. */ + t = mag_int; + while (t >= 10) + { + t /= 10; + int_digits++; + } + + /* Total visible width = sign + int + (dot + prec digits if prec>0). */ + total = int_digits + (neg ? 1 : 0) + ((prec > 0) ? (1 + prec) : 0); + while (pad-- > total) + { + f(' '); + written++; + } + + if (neg) + { + f('-'); + written++; + } + + /* Print integer part. */ + { + unsigned int p = 1; + int i; + for (i = 1; i < int_digits; i++) + p *= 10; + while (p > 0) + { + f((char)('0' + (mag_int / p) % 10)); + written++; + if (p == 1) + break; + p /= 10; + } + } + + /* Print fractional part. Extract one decimal digit at a time: + * frac' = frac * 10 + * digit = frac' >> radix + * frac = frac' & frac_mask + */ + if (prec > 0) + { + f('.'); + written++; + while (prec-- > 0) + { + u32 scaled; + int digit; + scaled = (u32)(((uint64_t)mag_frac * 10)); + digit = (int)(scaled >> radix); + mag_frac = scaled & frac_mask; + f((char)('0' + (digit % 10))); + written++; + } + } + + return written; +} + +/*************************************** + * FR_printNumH - write an integer as hexadecimal. + * + * f : per-character output function. Must not be NULL. + * n : integer to print (interpreted as unsigned for the digits). + * showPrefix : if non-zero, prepend "0x". + * + * Returns the number of characters written on success, -1 if f is NULL. + */ +int FR_printNumH(int (*f)(char), int n, int showPrefix) +{ + unsigned int u = (unsigned int)n; + int written = 0; + int x = (int)((sizeof(int) << 1) - 1); + int d; + + if (!f) + return -1; + + if (showPrefix) + { + f('0'); + f('x'); + written += 2; + } + + do + { + d = (int)((u >> (x << 2)) & 0xf); + d = (d > 9) ? (d - 0xa + 'a') : (d + '0'); + f((char)d); + written++; + } while (x--); + + return written; +} + +/*======================================================= + * FR_numstr — parse a decimal string into a fixed-point value. + * + * This is the runtime inverse of FR_printNumF: given a string like + * "12.34" or "-0.05" and a radix (number of fractional bits), it + * returns the s32 fixed-point representation. + * + * Features: + * - Leading whitespace is skipped. + * - Optional sign ('+' or '-'). + * - Up to 9 fractional digits are used (s32 range). + * - No malloc, no strtod, no libm. + * + * Returns 0 for NULL or empty input. + */ +s32 FR_numstr(const char *s, u16 radix) +{ + static const s32 pow10[10] = { + 1L, 10L, 100L, 1000L, 10000L, + 100000L, 1000000L, 10000000L, 100000000L, 1000000000L + }; + s32 int_part = 0, frac_part = 0; + int frac_digits = 0, neg = 0; + s32 result; + + if (!s || !*s) return 0; + + while (*s == ' ' || *s == '\t') s++; /* skip whitespace */ + if (*s == '-') { neg = 1; s++; } /* sign */ + else if (*s == '+') { s++; } + + while (*s >= '0' && *s <= '9') /* integer part */ + { int_part = int_part * 10 + (*s - '0'); s++; } + + if (*s == '.') { /* fractional part */ + s++; + while (*s >= '0' && *s <= '9') { + if (frac_digits < 9) + { frac_part = frac_part * 10 + (*s - '0'); frac_digits++; } + s++; + } + } + + result = int_part << radix; + if (frac_digits > 0) + result += (s32)(((int64_t)frac_part << radix) / pow10[frac_digits]); + + return neg ? -result : result; +} +#endif /* FR_NO_PRINT */ + +/*======================================================= + * Square root and hypot + * + * fr_isqrt64 is a private helper implementing the digit-by-digit + * ("shift-and-subtract") integer square root. The core loop computes + * floor(sqrt(n)), then a final remainder check rounds to nearest. + * Uses no division. At most 32 iterations. + */ +static u32 fr_isqrt64(uint64_t n) +{ + uint64_t root = 0; + uint64_t bit = (uint64_t)1 << 62; + while (bit > n) bit >>= 2; + while (bit != 0) + { + uint64_t trial = root + bit; + if (n >= trial) + { + n -= trial; + root = (root >> 1) + bit; + } + else + { + root >>= 1; + } + bit >>= 2; + } + /* round to nearest: if remainder > root, (root+1)^2 is closer */ + if (n > root) + root++; + return (u32)root; +} + +/*======================================================= + * FR_sqrt - fixed-radix square root. + * + * input : value at radix `radix`. Must be >= 0. + * radix : fractional bits of input AND result. + * return : sqrt(input) at radix `radix`, or FR_DOMAIN_ERROR if input < 0. + * + * Math: sqrt(input_fp / 2^r) at radix r is + * result_fp = sqrt(input_fp / 2^r) * 2^r = sqrt(input_fp * 2^r) + * so we compute isqrt(input_fp << radix) on a 64-bit accumulator. This + * works for any input that fits in s32 and any radix in [0, 30]. + * + * Precision: round-to-nearest sqrt. Worst-case absolute error is + * <= 0.5 LSB at the requested radix. + * Always non-negative for non-negative input. Result is monotone in + * input. + * + * Saturation: input < 0 returns FR_DOMAIN_ERROR (= INT32_MIN). Caller + * can test `result == FR_DOMAIN_ERROR` to detect domain errors. + * + * Side effects: none. Pure function. + */ +s32 FR_sqrt(s32 input, u16 radix) +{ + uint64_t n; + + if (input < 0) + return FR_DOMAIN_ERROR; + if (input == 0) + return 0; + + n = (uint64_t)(u32)input << radix; + return (s32)fr_isqrt64(n); +} + +/*======================================================= + * FR_hypot - sqrt(x*x + y*y) without intermediate overflow. + * + * x, y : values at radix `radix` + * radix : fractional bits of inputs AND result + * return : sqrt(x*x + y*y) at radix `radix`. + * + * Math: x*x + y*y is naturally at radix 2*radix; isqrt of a 2r-radix + * value yields an r-radix result, so no extra shifting is needed. The + * u64 accumulator can hold (INT32_MAX^2)*2 = ~2^63, so (x*x + y*y) never + * overflows for any s32 inputs. + * + * Precision: round-to-nearest. Worst-case absolute error <= 0.5 LSB + * at the requested radix. + * + * Side effects: none. Pure function. + */ +s32 FR_hypot(s32 x, s32 y, u16 radix) +{ + uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); + uint64_t yy = (uint64_t)((int64_t)y * (int64_t)y); + (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ + return (s32)fr_isqrt64(xx + yy); +} + +/*======================================================= + * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. + * + * Shift-only, no multiply, no 64-bit. Based on the piecewise-linear + * method described in US Patent 6,567,777 B1 (Chatterjee, expired). + * Peak error: ~0.10%. + */ +s32 FR_hypot_fast8(s32 x, s32 y) +{ + s32 hi, lo; + + /* absolute values (clamp INT32_MIN to INT32_MAX to avoid UB) */ + if (x < 0) x = (x == (s32)0x80000000) ? 0x7FFFFFFF : -x; + if (y < 0) y = (y == (s32)0x80000000) ? 0x7FFFFFFF : -y; + + /* hi = max(|x|,|y|), lo = min(|x|,|y|) */ + if (x > y) { hi = x; lo = y; } + else { hi = y; lo = x; } + + if (hi == 0) return 0; + + /* 8 piecewise-linear segments: dist ≈ a*hi + b*lo. + * Boundaries at β = 0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875. */ + if ((hi >> 1) < lo) { + /* β in (0.5, 1.0] */ + if (lo > hi - (hi >> 2)) { + /* β in (0.75, 1.0] */ + if (lo > hi - (hi >> 3)) /* β > 0.875 */ + /* a≈0.7305, b≈0.6836 */ + return hi - (hi >> 2) - (hi >> 6) - (hi >> 8) + + lo - (lo >> 2) - (lo >> 4) - (lo >> 8); + else /* β in (0.75, 0.875] */ + /* a≈0.7803, b≈0.6262 */ + return hi - (hi >> 2) + (hi >> 5) - (hi >> 10) + + (lo >> 1) + (lo >> 3) + (lo >> 10) + (lo >> 12); + } else { + /* β in (0.5, 0.75] */ + if (lo > hi - (hi >> 1) + (hi >> 3)) /* β > 0.625 */ + /* a≈0.8281, b≈0.5630 */ + return hi - (hi >> 2) + (hi >> 4) + (hi >> 6) + + (lo >> 1) + (lo >> 4) + (lo >> 11); + else /* β in (0.5, 0.625] */ + /* a≈0.8728, b≈0.4893 */ + return hi - (hi >> 3) - (hi >> 9) - (hi >> 12) + + (lo >> 1) - (lo >> 6) + (lo >> 8) + (lo >> 10); + } + } else { + /* β in [0, 0.5] */ + if ((hi >> 2) < lo) { + /* β in (0.25, 0.5] */ + if ((hi >> 1) - (hi >> 3) < lo) /* β > 0.375 */ + /* a≈0.9180, b≈0.3984 */ + return hi - (hi >> 4) - (hi >> 6) - (hi >> 8) + + (lo >> 1) - (lo >> 3) + (lo >> 5) - (lo >> 7); + else /* β in (0.25, 0.375] */ + /* a≈0.9551, b≈0.2988 */ + return hi - (hi >> 4) + (hi >> 6) + (hi >> 9) + + (lo >> 2) + (lo >> 4) - (lo >> 6) + (lo >> 9); + } else { + /* β in [0, 0.25] */ + if ((hi >> 3) < lo) /* β in (0.125, 0.25] */ + /* a≈0.9839, b≈0.1838 */ + return hi - (hi >> 6) - (hi >> 11) + + (lo >> 2) - (lo >> 4) - (lo >> 8) + (lo >> 12); + else /* β in [0, 0.125] */ + /* a≈0.9990, b≈0.0620 */ + return hi - (hi >> 10) + + (lo >> 4) - (lo >> 11); + } + } +} + +#ifndef FR_NO_WAVES +/*======================================================= + * Wave generators — synth-style fixed-shape waveforms. + * + * All wave functions take a u16 BAM phase in [0, 65535] (a full cycle) + * and return s16 in s0.15 format, clamped to [-32767, +32767] to match + * the trig amplitude convention used by fr_cos_bam / fr_sin_bam. + * + * Use FR_HZ2BAM_INC(hz, sample_rate) to compute a phase increment for + * a given output frequency, then accumulate it (mod 2^16) per sample. + * + * Side effects: pure functions (except fr_wave_noise which advances a + * caller-provided LFSR state pointer). + */ + +/* fr_wave_sqr - 50%-duty square wave. + * phase < pi (BAM<0x8000) → +full; phase >= pi → -full. + */ +s16 fr_wave_sqr(u16 phase) +{ + return (phase < 0x8000) ? (s16)32767 : (s16)-32767; +} + +/* fr_wave_pwm - variable-duty pulse. + * `duty` is the BAM threshold: phase < duty → high, else low. + * duty = 0 → always low + * duty = 0x8000 → 50% duty (same as fr_wave_sqr) + * duty = 0xffff → high almost everywhere (one BAM step low) + */ +s16 fr_wave_pwm(u16 phase, u16 duty) +{ + return (phase < duty) ? (s16)32767 : (s16)-32767; +} + +/* fr_wave_saw - rising sawtooth. + * Linear ramp from -32767 (just after phase=0) to +32767 (at phase=0xffff), + * passing through 0 at phase=0x8000. The single boundary case phase=0 + * (which would naturally produce -32768) is clamped to -32767 to keep the + * amplitude symmetric. + */ +s16 fr_wave_saw(u16 phase) +{ + s32 v = (s32)phase - (s32)0x8000; + if (v < -32767) v = -32767; + return (s16)v; +} + +/* fr_wave_tri - symmetric triangle. + * Four linear segments: + * Q1 [0, 0x4000) : rising 0 → +peak + * Q2 [0x4000, 0x8000): falling +peak → 0 + * Q3 [0x8000, 0xc000): falling 0 → -peak + * Q4 [0xc000, 0x10000): rising -peak → 0 + * Peaks are clamped to +/-32767 (the natural unclamped formula gives + * +/-32768 at the exact peak BAM). + */ +s16 fr_wave_tri(u16 phase) +{ + s32 t; + if (phase < 0x8000) + { + /* First half: 0 -> +peak -> 0 */ + if (phase < 0x4000) + t = (s32)phase << 1; /* 0 .. 0x7ffe */ + else + t = (s32)(0x8000 - phase) << 1; /* 0x8000 .. 2 */ + if (t > 32767) t = 32767; + return (s16)t; + } + else + { + /* Second half: 0 -> -peak -> 0 */ + if (phase < 0xc000) + t = (s32)(phase - 0x8000) << 1; /* 0 .. 0x7ffe */ + else + t = (s32)(0x10000 - phase) << 1;/* 0x8000 .. 2 */ + if (t > 32767) t = 32767; + return (s16)-t; + } +} + +/* fr_wave_tri_morph - variable-symmetry triangle. + * + * phase : u16 BAM + * break_point : u16 BAM where the wave reaches its positive peak. + * + * Going from 0 to +peak in [0, break_point), then from +peak back to 0 + * in [break_point, 0xffff]. The result is a triangle whose rising and + * falling slopes can differ. + * + * break_point = 0x8000 → symmetric triangle + * break_point = 0xffff → rising sawtooth (instant fall) + * break_point = 0x0001 → falling sawtooth (instant rise) + * break_point = 0 → degenerate; treated as 1 to avoid div-by-zero + * + * Note that this version returns values in [0, 32767] only (not bipolar). + * Caller can subtract 16384 and double if a bipolar version is desired. + * + * Costs: one 32-bit divide per sample. On Cortex-M3+ this is ~10-20 + * cycles. On 8051 / MSP430 this is much slower; pre-compute slopes if + * those targets matter to you. + */ +s16 fr_wave_tri_morph(u16 phase, u16 break_point) +{ + u32 t; + if (break_point == 0) + break_point = 1; + if (phase < break_point) + { + /* rising: 0 at phase=0, 32767 at phase=break_point */ + t = (u32)(((u32)phase * 32767UL) / (u32)break_point); + } + else + { + /* falling: 32767 at phase=break_point, 0 at phase=0xffff */ + u32 span = (u32)0xffff - (u32)break_point; + if (span == 0) + return 32767; + t = (u32)(((u32)((u32)0xffff - (u32)phase) * 32767UL) / span); + } + if (t > 32767) t = 32767; + return (s16)t; +} + +/* fr_wave_noise - LFSR-based pseudorandom noise. + * + * state : pointer to a u32 the caller maintains. Initial value must + * be non-zero (zero is a fixed point of the LFSR). A common + * seed is 0xACE1u or any other non-zero constant. + * + * Returns the next s16 sample in s0.15 (full ±32767 range, white-ish). + * Implementation: 32-bit Galois LFSR with the standard maximal-period + * tap polynomial 0xD0000001 (period 2^32 - 1 samples). + * + * Quality: this is "fast white noise" suitable for synth use. It is NOT + * cryptographically secure. For better statistical properties (FFT + * flatness etc.) layer a longer LFSR or use a separate PRNG. + */ +s16 fr_wave_noise(u32 *state) +{ + u32 lsb; + if (!state) + return 0; + lsb = *state & 1u; + *state >>= 1; + if (lsb) + *state ^= 0xD0000001u; + /* Take the top 16 bits and re-bias to s16 range, clamp to ±32767. */ + { + s32 v = (s32)((*state >> 16) & 0xffffu) - 32768; + if (v < -32767) v = -32767; + return (s16)v; + } +} + +/*======================================================= + * ADSR envelope generator + * + * Linear-segment Attack-Decay-Sustain-Release envelope. State is held + * in caller-allocated fr_adsr_t struct (no global state, no malloc). + * + * Lifecycle: + * 1. Caller allocates an fr_adsr_t (stack or static). + * 2. fr_adsr_init() once per patch with attack/decay/release durations + * in samples and a sustain level in s0.15. + * 3. fr_adsr_trigger() on note-on. Output rises 0 -> peak over `atk` + * samples, falls peak -> sustain over `dec` samples, then holds. + * 4. fr_adsr_release() on note-off. Output falls current -> 0 over a + * time controlled by the release rate (rate, not duration: the + * time depends on where in the envelope we are). + * 5. fr_adsr_step() once per audio sample to read the current value. + * + * Internal precision: levels are stored as s32 in s1.30 format so even + * very long envelopes (e.g. 48000-sample attack at 48 kHz = 1 second) + * have a non-zero per-sample increment. Output is converted to s0.15. + * + * Saturation: the envelope state machine is self-clamping; level cannot + * escape [0, 1<<30]. Output is in [0, 32767]. + */ + +#define FR_ADSR_PEAK_S130 ((s32)1 << 30) + +void fr_adsr_init(fr_adsr_t *env, + u32 attack_samples, + u32 decay_samples, + s16 sustain_level_s015, + u32 release_samples) +{ + if (!env) + return; + env->state = FR_ADSR_IDLE; + env->level = 0; + + /* sustain_level_s015 is s16 so its upper bound (32767) is already the + * type's max; only the lower bound needs an explicit clamp. */ + if (sustain_level_s015 < 0) + sustain_level_s015 = 0; + /* Convert s0.15 -> s1.30 by shifting left 15. */ + env->sustain = (s32)sustain_level_s015 << 15; + + env->attack_inc = (attack_samples > 0) + ? (s32)(FR_ADSR_PEAK_S130 / attack_samples) + : FR_ADSR_PEAK_S130; + env->decay_dec = (decay_samples > 0) + ? (s32)((FR_ADSR_PEAK_S130 - env->sustain) / (s32)decay_samples) + : (FR_ADSR_PEAK_S130 - env->sustain); + env->release_dec = (release_samples > 0) + ? (s32)(FR_ADSR_PEAK_S130 / release_samples) + : FR_ADSR_PEAK_S130; +} + +void fr_adsr_trigger(fr_adsr_t *env) +{ + if (!env) + return; + env->state = FR_ADSR_ATTACK; + env->level = 0; +} + +void fr_adsr_release(fr_adsr_t *env) +{ + if (!env) + return; + env->state = FR_ADSR_RELEASE; +} + +s16 fr_adsr_step(fr_adsr_t *env) +{ + if (!env) + return 0; + switch (env->state) + { + case FR_ADSR_ATTACK: + env->level += env->attack_inc; + if (env->level >= FR_ADSR_PEAK_S130) + { + env->level = FR_ADSR_PEAK_S130; + env->state = FR_ADSR_DECAY; + } + break; + case FR_ADSR_DECAY: + env->level -= env->decay_dec; + if (env->level <= env->sustain) + { + env->level = env->sustain; + env->state = FR_ADSR_SUSTAIN; + } + break; + case FR_ADSR_SUSTAIN: + env->level = env->sustain; + break; + case FR_ADSR_RELEASE: + env->level -= env->release_dec; + if (env->level <= 0) + { + env->level = 0; + env->state = FR_ADSR_IDLE; + } + break; + case FR_ADSR_IDLE: + default: + env->level = 0; + break; + } + /* s1.30 -> s0.15: shift right 15. Clamp for safety. */ + { + s32 out = env->level >> 15; + if (out < 0) out = 0; + if (out > 32767) out = 32767; + return (s16)out; + } +} +#endif /* FR_NO_WAVES */ diff --git a/docker/Dockerfile b/docker/Dockerfile index 4392617..2eee910 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,36 +1,41 @@ -ARG TARGETPLATFORM=linux/amd64 -FROM debian:bookworm-slim +FROM --platform=linux/amd64 ubuntu:22.04 + +ENV DEBIAN_FRONTEND=noninteractive # Cross-compilers for FR_Math size report. -# Bare toolchains only — no IDE, no runtime, no extras. -# Build with: docker build --platform linux/amd64 -t fr-math-sizes docker/ +# Mirrors the proven xelp crossbuild Dockerfile (github.com/deftio/xelp). +# Build with: docker build -t xelp-crossbuild:latest docker/ +# Use libc6-dev-i386 instead of gcc-multilib to avoid conflicts +# with cross-compiler packages on 22.04. RUN apt-get update && apt-get install -y --no-install-recommends \ - gcc g++ make bash binutils \ - gcc-arm-none-eabi libnewlib-arm-none-eabi \ - gcc-riscv64-unknown-elf \ - gcc-12-m68k-linux-gnu \ + build-essential \ + gcc clang tcc \ + libc6-dev-i386 \ + lib32gcc-s1 \ + gcc-aarch64-linux-gnu \ + gcc-arm-none-eabi \ + gcc-msp430 \ + gcc-avr \ gcc-m68hc1x \ + gcc-m68k-linux-gnu \ + gcc-powerpc-linux-gnu \ + gcc-riscv64-linux-gnu \ + gcc-riscv64-unknown-elf \ + gcc-xtensa-lx106 \ + picolibc-xtensa-lx106-elf \ + gcc-mipsel-linux-gnu \ sdcc \ + binutils \ wget ca-certificates xz-utils bzip2 \ && rm -rf /var/lib/apt/lists/* -# MSP430: TI's prebuilt msp430-elf-gcc (not in Debian repos). -RUN wget -q https://dr-download.ti.com/software-development/ide-configuration-compiler-or-debugger/MD-LlCjWuAbzH/9.3.1.2/msp430-gcc-9.3.1.11_linux64.tar.bz2 \ - -O /tmp/msp430-gcc.tar.bz2 \ - && mkdir -p /opt/msp430-gcc \ - && tar -xjf /tmp/msp430-gcc.tar.bz2 -C /opt/msp430-gcc --strip-components=1 \ - && rm /tmp/msp430-gcc.tar.bz2 - -ENV PATH="/opt/msp430-gcc/bin:${PATH}" - -# ESP32 (Xtensa): Espressif's prebuilt toolchain. -RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-13.2.0_20240530/xtensa-esp-elf-13.2.0_20240530-x86_64-linux-gnu.tar.xz \ - -O /tmp/xtensa-gcc.tar.xz \ - && mkdir -p /opt/xtensa-gcc \ - && tar -xJf /tmp/xtensa-gcc.tar.xz -C /opt/xtensa-gcc --strip-components=1 \ - && rm /tmp/xtensa-gcc.tar.xz - -ENV PATH="/opt/xtensa-gcc/bin:${PATH}" +# Espressif unified Xtensa toolchain (ESP32 LX6, ESP32-S2/S3 LX7). +# Not in Ubuntu repos; prebuilt from Espressif's crosstool-NG releases. +RUN wget -q https://github.com/espressif/crosstool-NG/releases/download/esp-15.2.0_20251204/xtensa-esp-elf-15.2.0_20251204-x86_64-linux-gnu.tar.xz \ + -O /tmp/xtensa-esp.tar.xz \ + && tar -xJf /tmp/xtensa-esp.tar.xz -C /opt \ + && for f in /opt/xtensa-esp-elf/bin/xtensa-esp*; do ln -sf "$f" /usr/local/bin/; done \ + && rm /tmp/xtensa-esp.tar.xz WORKDIR /src diff --git a/docker/build_sizes_compare.sh b/docker/build_sizes_compare.sh new file mode 100755 index 0000000..940de5f --- /dev/null +++ b/docker/build_sizes_compare.sh @@ -0,0 +1,174 @@ +#!/usr/bin/env bash +# +# build_sizes_compare.sh — cross-compile FR_math.c with and without FR_tan32.c +# for every supported target, and report the size delta. +# +# Run inside the Docker container: +# docker run --rm -v $(pwd):/src fr-math-sizes bash /src/docker/build_sizes_compare.sh + +set -euo pipefail + +SRC_OLD="/src/src/FR_math.c" +SRC_NEW="/src/src/FR_tan32.c" +INC="-I/src/src" +OUT="/src/build/size_compare" + +mkdir -p "${OUT}" + +# ── helpers ──────────────────────────────────────────────────────────── + +# get_text_size +# Compiles source(s) to .o files, sums .text sections. +get_text_size() { + local label="$1"; shift + local cc="$1"; shift + local sz_cmd="$1"; shift + local flags="$1"; shift + # remaining args are source files + + if ! command -v "${cc}" >/dev/null 2>&1; then + echo "n/a" + return + fi + + local total=0 + for src in "$@"; do + local bname + bname=$(basename "${src}" .c) + local obj="${OUT}/${label}_${bname}.o" + if ! ${cc} ${flags} ${INC} -std=c99 -Wall -Os -ffreestanding \ + -c "${src}" -o "${obj}" 2>/dev/null; then + echo "fail" + return + fi + local text + text=$(${sz_cmd} --format=berkeley "${obj}" 2>/dev/null | tail -1 | awk '{print $1}') + total=$((total + text)) + done + echo "${total}" +} + +# resolve_size_tool: given a compiler path, find the matching size binary +resolve_size_tool() { + local cc="$1" + local prefix="${cc%-gcc*}" + prefix="${prefix%-gcc-*}" + if [[ "${prefix}" != "${cc}" ]] && command -v "${prefix}-size" >/dev/null 2>&1; then + echo "${prefix}-size" + else + echo "size" + fi +} + +# ── target definitions ──────────────────────────────────────────────── + +declare -a T_NAMES T_CCS T_SZ T_FLAGS + +add() { + T_NAMES+=("$1") + T_CCS+=("$2") + T_SZ+=("$(resolve_size_tool "$2")") + T_FLAGS+=("$3") +} + +# ARM +add "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb" +add "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft" +add "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb" + +# RISC-V +add "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32" + +# Xtensa (ESP32) +add "ESP32 (Xtensa)" xtensa-esp-elf-gcc "" + +# 68k +add "68k" m68k-linux-gnu-gcc-12 "" + +# x86 +add "x86-32" gcc "-m32" +add "x86-64" gcc "-m64" + +# MSP430 (16-bit, no stdint) +add "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT" + +# 68HC11 (8-bit) +add "68HC11" m68hc11-gcc "-DFR_NO_STDINT" + +# ── compile ──────────────────────────────────────────────────────────── + +echo "" +echo "FR_Math cross-platform size comparison: OLD vs OLD+NEW tan32" +echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" +echo "" + +declare -a R_OLD R_NEW + +for i in "${!T_NAMES[@]}"; do + label="${T_NAMES[$i]}" + cc="${T_CCS[$i]}" + sz="${T_SZ[$i]}" + flags="${T_FLAGS[$i]}" + + tag=$(echo "${label}" | tr ' ()/' '____') + + old=$(get_text_size "${tag}_old" "${cc}" "${sz}" "${flags}" "${SRC_OLD}") + new=$(get_text_size "${tag}_new" "${cc}" "${sz}" "${flags}" "${SRC_OLD}" "${SRC_NEW}") + + R_OLD+=("${old}") + R_NEW+=("${new}") + + echo " ${label}: old=${old} old+new=${new}" +done + +# ── output table ─────────────────────────────────────────────────────── + +echo "" +echo "## FR_Math size: Old vs Old + 32-bit LUT tan (\`-Os -ffreestanding\`)" +echo "" +printf "| %-26s | %10s | %10s | %10s | %6s |\n" "Target" "Old (text)" "w/ tan32" "Delta" "Delta%" +printf "| %-26s | %10s | %10s | %10s | %6s |\n" "--------------------------" "----------" "----------" "----------" "------" + +for i in "${!T_NAMES[@]}"; do + old="${R_OLD[$i]}" + new="${R_NEW[$i]}" + + if [[ "${old}" =~ ^[0-9]+$ ]] && [[ "${new}" =~ ^[0-9]+$ ]]; then + delta=$((new - old)) + pct=$(awk "BEGIN { printf \"%.1f\", 100.0*${delta}/${old} }") + printf "| %-26s | %8s B | %8s B | %+8d B | %5s%% |\n" \ + "${T_NAMES[$i]}" "${old}" "${new}" "${delta}" "${pct}" + else + printf "| %-26s | %10s | %10s | %10s | %6s |\n" \ + "${T_NAMES[$i]}" "${old}" "${new}" "—" "—" + fi +done + +echo "" +echo "Old = FR_math.c only (contains existing tan/atan)." +echo "w/ tan32 = FR_math.c + FR_tan32.c (adds new 32-bit LUT tan/atan alongside old)." +echo "Delta = additional bytes from FR_tan32.c (new functions + 129-entry u32 table)." +echo "" + +# ── per-function breakdown (x86-64) ─────────────────────────────────── + +echo "### Per-function breakdown (x86-64, GCC -Os)" +echo "" + +obj_old="${OUT}/x86_64_old_FR_math.o" +obj_new="${OUT}/x86_64_new_FR_tan32.o" + +if [[ -f "${obj_old}" ]] && [[ -f "${obj_new}" ]]; then + echo "**Old tan/atan in FR_math.o:**" + echo '```' + nm "${obj_old}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -iE "tan|atan" || true + echo '```' + echo "" + echo "**New in FR_tan32.o:**" + echo '```' + nm "${obj_new}" -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true + echo '```' +fi + +echo "" +echo "Done." diff --git a/docker/size_detail.sh b/docker/size_detail.sh new file mode 100755 index 0000000..19677e5 --- /dev/null +++ b/docker/size_detail.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -euo pipefail + +INC="-I/src/src" +FLAGS="-std=c99 -Wall -Os -ffreestanding" +OUT=/tmp/sz +mkdir -p "${OUT}" + +do_platform() { + local label="$1" + local cc="$2" + local flags="$3" + + if ! command -v "${cc}" >/dev/null 2>&1; then + return + fi + + # Resolve size and nm tools + local sz_cmd="size" + local nm_cmd="nm" + local prefix="${cc%-gcc*}" + if [ "${prefix}" != "${cc}" ]; then + command -v "${prefix}-size" >/dev/null 2>&1 && sz_cmd="${prefix}-size" + command -v "${prefix}-nm" >/dev/null 2>&1 && nm_cmd="${prefix}-nm" + fi + + # Compile + ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null || return + ${cc} ${FLAGS} ${flags} ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null || return + + local old_text new_text + old_text=$(${sz_cmd} --format=berkeley "${OUT}/old.o" | tail -1 | awk '{print $1}') + new_text=$(${sz_cmd} --format=berkeley "${OUT}/new.o" | tail -1 | awk '{print $1}') + + # Sum old tan/atan function sizes from nm -S + local old_tan_total=0 + while IFS=' ' read -r addr size typ name; do + if [ -n "${size}" ]; then + dec_size=$((16#${size})) + old_tan_total=$((old_tan_total + dec_size)) + fi + done < <(${nm_cmd} -n -S --defined-only "${OUT}/old.o" 2>/dev/null \ + | grep -E " [tT] " | grep -iE "tan|atan" || true) + + local replace_delta=$((new_text - old_tan_total)) + local new_total=$((old_text - old_tan_total + new_text)) + + printf "| %-26s | %6s | %6s | %6s | %6s | %+6d |\n" \ + "${label}" "${old_text}" "${old_tan_total}" "${new_text}" "${new_total}" "${replace_delta}" + + rm -f "${OUT}/old.o" "${OUT}/new.o" +} + +echo "" +echo "## FR_Math: Old vs Replacement size (new tan32 replaces old tan/atan)" +echo "" +printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \ + "Target" "Old" "OldT/A" "New" "Repl" "Delta" +printf "| %-26s | %6s | %6s | %6s | %6s | %6s |\n" \ + "--------------------------" "------" "------" "------" "------" "------" + +do_platform "RP2040 (Cortex-M0+)" arm-none-eabi-gcc "-mcpu=cortex-m0plus -mthumb" +do_platform "STM32 (Cortex-M4)" arm-none-eabi-gcc "-mcpu=cortex-m4 -mthumb -mfloat-abi=soft" +do_platform "Cortex-M0 (Thumb-1)" arm-none-eabi-gcc "-mcpu=cortex-m0 -mthumb" +do_platform "RISC-V 32 (rv32im)" riscv64-unknown-elf-gcc "-march=rv32im -mabi=ilp32" +do_platform "ESP32 (Xtensa)" xtensa-esp-elf-gcc "" +do_platform "68k" m68k-linux-gnu-gcc-12 "" +do_platform "x86-32" gcc "-m32" +do_platform "x86-64" gcc "-m64" +do_platform "MSP430" msp430-elf-gcc "-mmcu=msp430f5529 -DFR_NO_STDINT" + +echo "" +echo "Old = FR_math.c total .text" +echo "OldT/A = old tan+atan functions within FR_math.o (would be removed)" +echo "New = FR_tan32.c total .text (replacement functions + 129-entry u32 table)" +echo "Repl = library size after replacement (Old - OldT/A + New)" +echo "Delta = New - OldT/A (net change from replacement)" + +# === x86-64 per-function detail === +echo "" +echo "### x86-64 per-function detail" +echo "" + +gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_math.c -o "${OUT}/old.o" 2>/dev/null +gcc ${FLAGS} -m64 ${INC} -c /src/src/FR_tan32.c -o "${OUT}/new.o" 2>/dev/null + +echo "**Old tan/atan functions in FR_math.o:**" +echo '```' +nm -n -S --defined-only "${OUT}/old.o" | grep -E " [tT] " | grep -iE "tan|atan" | \ +while IFS=' ' read -r addr size typ name; do + printf " %-30s %d bytes\n" "${name}" "$((16#${size}))" +done +echo '```' + +echo "" +echo "**New functions in FR_tan32.o:**" +echo '```' +nm -n -S --defined-only "${OUT}/new.o" | grep -E " [tT] " | \ +while IFS=' ' read -r addr size typ name; do + printf " %-30s %d bytes\n" "${name}" "$((16#${size}))" +done +echo '```' diff --git a/docs/README.md b/docs/README.md index 0e1dd1a..636de4a 100644 --- a/docs/README.md +++ b/docs/README.md @@ -16,7 +16,7 @@ into a single format. Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc, RISC-V toolchains, and Arduino. - Zero dependencies beyond ``. -- Parameterised radix: every function takes the binary point as an +- Parameterized radix: every function takes the binary point as an argument, so you choose how many fractional bits you need per call. - Deterministic, bounded error — every public symbol has a documented worst case in the [API reference](api-reference.md). @@ -33,7 +33,7 @@ or any tooling. If you want the browser version, look in | --- | --- | | [getting-started.md](getting-started.md) | Clone, build, run your first FR_Math program. | | [fixed-point-primer.md](fixed-point-primer.md) | Why fixed-point exists, sM.N notation, operations, how to pick a radix. | -| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behaviour. | +| [api-reference.md](api-reference.md) | Every public symbol: signature, radix, precision, error behavior. | | [examples.md](examples.md) | Runnable snippets: trig, log, waves, ADSR, 2D transforms. | | [building.md](building.md) | Makefile, scripts, test suite, coverage, cross-compilation. | | [releases.md](releases.md) | Release history with per-version highlights and breaking changes. | @@ -42,27 +42,33 @@ or any tooling. If you want the browser version, look in Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. See the -[TDD report](../build/test_tdd_report.md) for sweeps at radixes 8, 12, -16, and 24. Percent errors skip expected values near zero (|expected| < 0.01). +TDD report (run `make test-tdd` to generate `build/test_tdd_report.md`) +for sweeps at radixes 8, 12, 16, and 24. -| Function | Max err (%) | Avg err (%) | Note | +| Function | Max err (%)*| Avg err (%) | Note | |---|---:|---:|---| -| sin / cos | 0.7169 | 0.0100 | 65536-pt sweep + specials | -| tan | 0.7118 | 0.0162 | 65536-pt sweep (skip poles) | -| asin / acos | 0.7025 | 0.0105 | 65536-pt; sqrt approx near boundary | -| atan2 | 0.4953 | 0.0268 | 65536x5 radii; asin/acos+hypot_fast8 | -| atan | 0.2985 | 0.0159 | 20001-pt sweep [-10,10]; via FR_atan2 | -| sqrt | 0.0003 | 0.0000 | Round-to-nearest | -| log2 | 0.2479 | 0.0045 | 65-entry mantissa table | -| pow2 | 0.1373 | 0.0057 | 65-entry fraction table | -| ln, log10 | 0.0015 | 0.0004 | Via FR_MULK28 from log2 | -| exp | 0.0719 | 0.0051 | FR_MULK28 + FR_pow2 | -| exp_fast | 0.0719 | 0.0064 | Shift-only scaling | -| pow10 | 0.1163 | 0.0075 | FR_MULK28 + FR_pow2 | -| pow10_fast | 0.1163 | 0.0100 | Shift-only scaling | -| hypot (exact) | 0.0001 | 0.0000 | 64-bit intermediate | -| hypot_fast8 (8-seg) | 0.0977 | 0.0508 | Shift-only, no multiply | +| sin/cos (BAM) | 0.1526 | 0.0030 | very fast binary angle trig | +| sin/cos (deg) | 0.1526 | 0.0029 | degree input trig fns | +| sin/cos (rad) | 0.1828 | 0.0033 | radian (traditional) trig | +| tan (BAM) | 0.5823 | 0.0008 | binary angle tangent; ±maxint at poles | +| tan (deg) | 0.5311 | 0.0008 | degree input tangent; saturated at poles | +| tan (rad) | 0.0386 | 0.0001 | radian (traditional) tangent | +| asin / acos | 0.7771 | 0.0280 | reverse trig, radian output | +| atan2 | 0.2564 | 0.0237 | reverse tangent, always safe | +| atan | 0.2425 | 0.0155 | reverse tangent, accepts up to maxint | +| sqrt | 0.0000 | 0.0000 | Round-to-nearest | +| log2 | 0.0116 | 0.0016 | shift/add only for speed | +| pow2 | 0.0018 | 0.0004 | shift/add only for speed | +| ln, log10 | 0.0004 | 0.0000 | shift/add only for speed | +| exp | 0.0003 | 0.0000 | shift/add only for speed | +| exp_fast | 0.0009 | 0.0001 | Shift-only scaling | +| pow10 | 0.0005 | 0.0000 | shift/add only for speed | +| pow10_fast | 0.0022 | 0.0002 | Shift-only scaling | +| hypot (exact) | 0.0000 | 0.0000 | Uses 64-bit intermediate | +| hypot_fast8 (8-seg) | 0.0915 | 0.0320 | Shift-only, no multiply | + +*Relative error; reference clamped to 1% of full-scale output. ## What's in the box @@ -71,8 +77,8 @@ radix — Q16.16 is just the reference point for the table. See the | --- | --- | | Arithmetic | `FR_ADD`, `FR_SUB`, `FR_DIV`, `FR_DIV32`, `FR_MOD`, `FR_FixMuls`, `FR_FixMulSat`, `FR_CHRDX` | | Utility | `FR_MIN`, `FR_MAX`, `FR_CLAMP`, `FR_ABS`, `FR_SGN` | -| Trig (integer deg) | `FR_Sin`, `FR_Cos`, `FR_Tan`, `FR_SinI`, `FR_CosI`, `FR_TanI` | -| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_sin_deg`, `fr_cos_deg` | +| Trig (degree) | `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg`, `FR_SinI`, `FR_CosI`, `FR_TanI` | +| Trig (radian/BAM) | `fr_sin`, `fr_cos`, `fr_tan`, `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam` | | Inverse trig | `FR_atan`, `FR_atan2`, `FR_asin`, `FR_acos` | | Log / exp | `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, `FR_EXP_FAST`, `FR_POW10_FAST`, `FR_MULK28` | | Roots | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | @@ -118,7 +124,7 @@ pays off: - **8- and 16-bit MCUs** (AVR, MSP430, 8051, sdcc) where the FPU does not exist and even software float is too slow or too large. -- **Hot inner loops on any CPU** where a parameterised-radix integer +- **Hot inner loops on any CPU** where a parameterized-radix integer multiply is faster and more deterministic than a `float`. Think DSP taps, PID loops, coordinate transforms inside a scanline renderer. - **Bit-exact reproducibility** across compilers, architectures, and @@ -163,18 +169,23 @@ s32 two = I2FR(2, R); /* 2.0 → raw 131072 */ * * MixedCase FR_ names are functions — they contain loops, tables, or * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * lowercase fr_ names are v2 functions (degree/radian/BAM trig, wave + * generators, ADSR envelopes): + * fr_sin_deg, fr_cos_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... + * + * Legacy aliases: FR_Cos, FR_Sin, FR_Tan still work — they are + * macros that map to fr_cos_deg, fr_sin_deg, fr_tan_deg. New code + * should use the fr_ names directly. * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ @@ -210,23 +221,23 @@ understand *how* the radix notation works first. | Multiply-free option | No | No | Yes (e.g. `FR_EXP_FAST`, `FR_hypot_fast8`) | | Wave generators | No | No | 6 shapes + ADSR | | Dependencies | None | ARM only | None | -| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 4.2 KB | +| Code size (Cortex-M0, -Os) | 2.4 KB | ~40 KB+ | 3.4 KB lean / 5.7 KB full | Sizes measured with `arm-none-eabi-gcc -mcpu=cortex-m0 -mthumb -Os`. libfixmath covers trig/sqrt/exp in Q16.16 only; FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. -See [`docker/build_sizes.sh`](../docker/build_sizes.sh) for the build -script. +See [`scripts/crossbuild_sizes.sh`](../scripts/crossbuild_sizes.sh) for +the build script. ## History FR_Math has been in service since **2000**, originally built for graphics transforms on 16 MHz 68k Palm Pilots (it shipped inside Trumpetsoft's *Inkstorm*), then ported forward to ARM, x86, MIPS, -RISC-V, and various 8/16-bit embedded targets. v2.0.7 is the current -release with a full test suite, bit-exact numerical -specification, and CI on every push. +RISC-V, and various 8/16-bit embedded targets. The current release +has a full test suite, bit-exact numerical specification, and CI on +every push. ## License diff --git a/docs/api-reference.md b/docs/api-reference.md index 3f97f20..cf1cec5 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -1,7 +1,7 @@ # API Reference Every public symbol, grouped by topic. Each entry lists the radix -convention, the precision, and the error / saturation behaviour. All +convention, the precision, and the error / saturation behavior. All types are from `FR_defs.h`: `s8 s16 s32 s64` for signed and `u8 u16 u32 u64` for unsigned integers (these are aliases for the `` types). @@ -12,7 +12,7 @@ Most entries list **inputs**, **output**, **radix handling** and **precision** separately, because in a mixed-radix library those four things are what actually lets you plan an arithmetic pipeline without hidden -quantisation. If you are new to fixed-point, the +quantization. If you are new to fixed-point, the [Fixed-Point Primer](fixed-point-primer.md) explains the notation first; come back here once you're comfortable reading `s15.16` and `s0.15`. @@ -79,6 +79,8 @@ and in return get float-like ergonomics with integer-only codegen. | `FR_OVERFLOW_POS` | `0x7FFFFFFF` (`INT32_MAX`) | Saturating ops when the true result exceeds `+2^31`. | | `FR_OVERFLOW_NEG` | `0x80000000` (`INT32_MIN`) | Saturating ops when the true result is below `−2^31`. | | `FR_DOMAIN_ERROR` | `0x80000000` (`INT32_MIN`) | Functions with an invalid input, e.g. `FR_sqrt(-1)`, `FR_log2(0)`, `FR_asin(2.0)`. **Shares the bit pattern of `FR_OVERFLOW_NEG`**, so don't mix a `≤ FR_OVERFLOW_NEG` check with a domain check — test for the exact sentinel. | +| `FR_TRIG_MAXVAL` | `0x7FFFFFFF` (`INT32_MAX`) | Tangent saturation ceiling. Returned by `fr_tan_bam`, `fr_tan`, `fr_tan_deg`, and `FR_TanI` when the angle is near a pole (90° + k·180°). | +| `FR_TRIG_MINVAL` | `-FR_TRIG_MAXVAL` | Tangent saturation floor. Negative-side pole saturation. | ### Common numerical constants (`FR_math.h`) @@ -118,7 +120,7 @@ so call sites read as intent: | --- | --- | --- | --- | | `I2FR(i, r)` | `i`: integer; `r`: target radix in bits | `s32` at radix `r` | `(i) << (r)`. No bounds check. Use when you know `|i|` fits in `32 − r` signed bits. | | `FR2I(x, r)` | `x`: fixed-point at radix `r` | integer | `(x) >> (r)`. Truncates toward **−∞** (C's signed shift). `FR2I(-1, 4) == -1`, not 0. | -| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behaviour. | +| `FR_INT(x, r)` | `x`: fixed-point at radix `r` | integer | Truncates toward **zero**. `FR_INT(-1, 4) == 0`. Useful when you want C's normal integer-cast behavior. | | `FR_NUM(i, f, d, r)` | `i`: integer part; `f`: decimal fraction digits; `d`: number of digits in `f`; `r`: target radix | `s32` at radix `r` | Build a fixed-point literal from decimal. `FR_NUM(12, 34, 2, 10)` is 12.34 at s.10. Rounds toward zero; for round-to-nearest, add half an LSB at the call site. | | `FR_numstr(s, r)` | `s`: null-terminated decimal string (e.g. `"3.14159"`); `r`: target radix | `s32` at radix `r` | Runtime string-to-fixed-point parser (inverse of `FR_printNumF`). Handles signs, leading whitespace, and leading-zero fractions like `"0.05"`. Up to 9 fractional digits. No malloc, no strtod, no libm. Returns 0 for NULL or empty input. | | `FR2D(x, r)` | `x`: fixed-point at radix `r` | `double` | Debug-only: `x / (double)(1 << r)`. Pulls in `libm` — compile it out of release builds. | @@ -157,13 +159,13 @@ so call sites read as intent: | `FR_MAX(a, b)` | Two values of the same type | The larger of the two | Evaluates each argument once. | | `FR_CLAMP(x, lo, hi)` | `x`: value; `lo`, `hi`: bounds | `x` clamped to `[lo, hi]` | Equivalent to `FR_MIN(FR_MAX(x, lo), hi)`. | | `FR_DIV(x, xr, y, yr)` | `x`: numerator at radix `xr`; `y`: denominator at radix `yr` | `s32` at radix `xr` | Pre-scales the numerator in a 64-bit intermediate and **rounds to nearest** (adds half the divisor before truncating, with correct sign handling). Worst-case error ≤ 0.5 LSB. Works correctly across the full Q16.16 range. | -| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behaviour of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. | +| `FR_DIV_TRUNC(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s64)(x) << (yr)) / (s32)(y)`. Truncating division (rounds toward zero). This was the behavior of `FR_DIV` in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. | | `FR_DIV32(x, xr, y, yr)` | same as `FR_DIV` | `s32` at radix `xr` | `((s32)(x) << (yr)) / (s32)(y)`. 32-bit-only truncating path — requires `|x| < 2^(31 − yr)` to avoid overflow in the intermediate shift. Use on tiny targets (PIC, AVR, 8051) where 64-bit ops pull in unwanted compiler runtime code. | | `FR_MOD(x, y)` | `x`, `y`: same radix | remainder at the same radix | `(x) % (y)`. Standard C remainder semantics. | ## Arithmetic -FR_Math splits arithmetic into three flavours. The +FR_Math splits arithmetic into three flavors. The **macros** (`FR_ADD`, `FR_SUB`) are mixed-radix, inline, and wrap on overflow. The **s.16 helper functions** (`FR_FixMuls`, @@ -291,7 +293,7 @@ bits = 16. Going wider would only add noise, not precision. "But what if I want to pass in any signed angle without worrying about conversion?" That is exactly what `FR_CosI(deg)`, -`FR_Cos(deg, radix)`, and `fr_cos(rad, radix)` are for. All three +`fr_cos_deg(deg, radix)`, and `fr_cos(rad, radix)` are for. All three take *signed* inputs and reduce them to BAM for you. The only place you actually see a `u16` is at the internal `fr_cos_bam` / `fr_sin_bam` boundary, which you only call by hand if you *want* @@ -346,7 +348,7 @@ Four shifts plus three adds — cheap on an 8051, AVR, or any hand-written DSP inner loop — and the answer has at most ±0.5 LSB of truncation error. The same discipline applies to the other direction: in `FR_DEG2BAM` the divide-by-360 is -a compile-time constant, so any optimising compiler folds it into a +a compile-time constant, so any optimizing compiler folds it into a multiply-by-reciprocal (or, on a weaker toolchain, a runtime call that you can inline yourself). @@ -396,6 +398,7 @@ represents exactly 1.0 in the s15.16 output format. | --- | --- | --- | | `fr_cos_bam` | `s32 fr_cos_bam(u16 bam)` | s15.16, range [−65536, +65536]. Exact at cardinal angles. | | `fr_sin_bam` | `s32 fr_sin_bam(u16 bam)` | s15.16. Defined as `fr_cos_bam(bam − FR_BAM_QUADRANT)`. | +| `fr_tan_bam` | `s32 fr_tan_bam(u16 bam)` | s15.16. Uses a 65-entry octant table for [0, 45°] and the reciprocal identity `tan(x) = 1/tan(90°−x)` for (45°, 90°). Saturates to `±FR_TRIG_MAXVAL` at the poles (90°, 270°). Returns exact 0 at 0° and 180°. No 64-bit intermediates; one 32-bit division only in the >45° path. | ### Radian-native @@ -405,35 +408,35 @@ represents exactly 1.0 in the s15.16 output format. | `fr_sin` | `s32 fr_sin(s32 rad, u16 radix)` | Same convention. | | `fr_tan` | `s32 fr_tan(s32 rad, u16 radix)` | Returns at **radix 16** (`FR_TRIG_OUT_PREC`). Computed as `(sin << 16) / cos`; saturates to `±INT32_MAX` (`FR_TRIG_MAXVAL`) near π/2 + kπ where cos → 0. | -### Integer-degree wrappers (legacy API) +### Degree wrappers (current and legacy) -The uppercase legacy API takes an angle in degrees. -`FR_SinI`, `FR_CosI` and `FR_TanI` -take plain integer degrees — the trailing *I* denotes -*integer*. The variants *without* the `I` -suffix (`FR_Sin`, `FR_Cos`, `FR_Tan`) -accept a `radix` argument and treat the degree value as -*fixed-point*, so you can pass fractional degrees like -42.375°. +The primary degree-based API uses lowercase `fr_` names. +These are functions (not macros) that take a degree value as +fixed-point at a caller-chosen radix: -| Symbol | Signature | Kind | +| Function | Signature | Notes | | --- | --- | --- | -| `FR_SinI` | `FR_SinI(deg)` → `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. | -| `FR_CosI` | `FR_CosI(deg)` → `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. | -| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | -| `FR_Sin` | `s32 FR_Sin(s16 deg, u16 radix)` | `deg` is fixed-point at `radix`. Returns s15.16. | -| `FR_Cos` | `s32 FR_Cos(s16 deg, u16 radix)` | Same. | -| `FR_Tan` | `s32 FR_Tan(s16 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | +| `fr_sin_deg` | `s32 fr_sin_deg(s32 deg, u16 radix)` | `deg` is fixed-point degrees at `radix`. Returns s15.16. | +| `fr_cos_deg` | `s32 fr_cos_deg(s32 deg, u16 radix)` | Same. | +| `fr_tan_deg` | `s32 fr_tan_deg(s32 deg, u16 radix)` | Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | -### Degree wrappers on the BAM path +Pass `radix = 0` for plain integer degrees, or a higher radix +for fractional degrees (e.g. 42.375° at radix 4). -If you're using the lowercase family and want to skip the -radix entirely, two convenience macros cover pure integer degrees: +**Integer-degree macros** (`FR_SinI`, `FR_CosI`, `FR_TanI`) +take plain integer degrees -- the trailing *I* denotes +*integer*. These remain unchanged: -| Macro | Expansion | -| --- | --- | -| `fr_cos_deg(deg)` | `fr_cos_bam(FR_DEG2BAM(deg))` | -| `fr_sin_deg(deg)` | `fr_sin_bam(FR_DEG2BAM(deg))` | +| Symbol | Signature | Kind | +| --- | --- | --- | +| `FR_SinI` | `FR_SinI(deg)` -> `s32` (s15.16) | Macro: `fr_sin_bam(FR_DEG2BAM(deg))`. Zero-cost inline. | +| `FR_CosI` | `FR_CosI(deg)` -> `s32` (s15.16) | Macro: `fr_cos_bam(FR_DEG2BAM(deg))`. | +| `FR_TanI` | `s32 FR_TanI(s16 deg)` | Function. Returns at radix 16; saturates to `±INT32_MAX` near 90° / 270°. | + +**Legacy aliases.** The uppercase `FR_Sin`, `FR_Cos`, and +`FR_Tan` macros still work -- they map directly to +`fr_sin_deg`, `fr_cos_deg`, and `fr_tan_deg` respectively. +New code should use the `fr_` names. ## Inverse trigonometry diff --git a/docs/building.md b/docs/building.md index c9f5f21..db8d192 100644 --- a/docs/building.md +++ b/docs/building.md @@ -14,9 +14,10 @@ FR_Math has no dependencies beyond a C99 compiler and - Optional: `lcov` / `gcov` for coverage reports. -There is no Autotools, no CMake, no Ninja, no package-manager -integration. The library is small enough that the Makefile fits on -one screen. +There is no Autotools, no Ninja, no package-manager integration. +The primary build system is a single Makefile. A `CMakeLists.txt` +exists for ESP-IDF integration only — it is not a general-purpose +CMake build. ## Makefile targets @@ -90,28 +91,27 @@ See `release_management.md` for the full step-by-step reference. ## The test suite -Tests live under `tests/` and are split into six +Tests live under `tests/` and are split into seven binaries to keep compile times low: | Binary | What it checks | | --- | --- | -| `test_basic` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding. | -| `test_trig` | Integer-degree trig (`FR_Sin` et al.). | -| `test_trig_radians` | Radian / BAM trig and the v2 `fr_sin` API. | -| `test_log_exp` | Log base 2 / ln / log10 and their inverses. | +| `fr_test` | Radix conversions, `FR_ADD`, `FR_FixMuls`, rounding (legacy harness). | +| `test_comprehensive` | Trig (degree, radian, BAM), log/exp, sqrt, hypot. | | `test_2d` | 2D transforms, determinants, inverses. | -| `test_full_coverage` | Dark-corner cases: overflow sentinels, edge radixes, round-trips. | -| `test_tdd` | Characterisation tests pinned to bit-exact reference values. | +| `test_overflow` | Overflow sentinels, saturation, edge radixes. | +| `test_full` | Full-coverage dark-corner cases and round-trips. | +| `test_2d_complete` | Extended 2D: matrix composition, inverse, point transforms. | +| `test_tdd` | Characterization tests pinned to bit-exact reference values. | -As of v2.0.0 the suite contains **42 tests** across -those binaries and covers **99%** of the library source. +The suite covers **99%** of the library source. Every public symbol is exercised at least once. ### Running a single binary ```bash -make build/test_basic -./build/test_basic +make test-comprehensive +./build/test_comprehensive # or all of them at once make test @@ -119,12 +119,12 @@ make test ### Running the TDD pins after a change -`test_tdd.cpp` is a characterisation suite. It records +`test_tdd.cpp` is a characterization suite. It records exact bit patterns for a sample of inputs and fails loudly if those -patterns drift. Any change that modifies the numerical behaviour of +patterns drift. Any change that modifies the numerical behavior of the library will break this suite — that's the point. -If you *intended* to change the numerical behaviour (e.g. +If you *intended* to change the numerical behavior (e.g. you improved a polynomial approximation), update the pinned values in `tests/test_tdd.cpp` and note the change in `release_notes.md` along with any updates to the @@ -155,44 +155,60 @@ you do *not* need `libm`. | Motorola 68k | `m68k-linux-gnu-gcc` | Docker. | | Motorola 68HC11 | `m68hc11-gcc` | Docker. | | PowerPC | `powerpc-linux-gnu-gcc` | Docker. | +| MIPS32 | `mipsel-linux-gnu-gcc` | Docker. | | Xtensa LX106 (ESP8266) | `xtensa-lx106-elf-gcc` | Docker. | +| Xtensa LX7 (ESP32-S3) | `xtensa-esp-elf-gcc` | Docker (Espressif toolchain). | | 8051 | `sdcc` | Manual. | ### Code size (.text section, compiled with `-Os`) -Sizes are for `FR_math.c` compiled with `-Os -ffreestanding`. -Core = compiled with `-DFR_CORE_ONLY` (math only, no print, no waves). +Sizes are for `FR_math.c` compiled with `-Os`. +Lean = `-DFR_LEAN -DFR_NO_PRINT` (radian trig, inv trig, log/exp, sqrt). +Core = `-DFR_CORE_ONLY` (+ degree trig, BAM tan, log10, hypot). +Full = all features (+ print, waves, ADSR). With `-ffunction-sections` and linker `--gc-sections`, only the functions your application references are linked, so real flash usage will be smaller. -| Target | Core | Full | -|--------|-----:|-----:| -| RP2040 (Cortex-M0+) | 2.6 KB | 4.2 KB | -| STM32 (Cortex-M4) | 2.6 KB | 4.2 KB | -| RISC-V 32 (rv32imac) | 3.0 KB | 4.7 KB | -| ESP32 (Xtensa) | 3.5 KB | 5.2 KB | -| 68k | 3.5 KB | 5.3 KB | -| x86-64 (GCC) | 3.5 KB | 5.7 KB | -| x86-32 | 4.5 KB | 6.8 KB | -| MSP430 (16-bit) | 5.9 KB | 8.9 KB | -| 68HC11 | 10.8 KB | 16.0 KB | -| AVR (ATmega328P) | 7.0 KB | 10.6 KB | +| Target | Lean | Core | Full | +|--------|-----:|-----:|-----:| +| Xtensa LX7 (ESP32-S3) | 2.9 KB | 4.2 KB | 5.3 KB | +| Cortex-M4 (STM32) | 3.3 KB | 4.4 KB | 5.5 KB | +| Cortex-M0 (RP2040) | 3.4 KB | 4.5 KB | 5.7 KB | +| ARM Thumb | 3.4 KB | 4.7 KB | 5.9 KB | +| RISC-V rv64 | 4.0 KB | 5.5 KB | 6.8 KB | +| RISC-V rv32 | 4.1 KB | 5.5 KB | 6.8 KB | +| Xtensa LX106 (ESP8266) | 4.2 KB | 5.8 KB | 7.3 KB | +| ARM32 | 4.3 KB | 5.8 KB | 7.7 KB | +| 68k | 4.4 KB | 6.2 KB | 7.8 KB | +| MIPS32 | 4.7 KB | 6.6 KB | 8.7 KB | +| x86-64 (GCC) | 4.6 KB | 6.1 KB | 8.0 KB | +| AArch64 (ARM64) | 4.8 KB | 6.6 KB | 8.7 KB | +| x86-32 | 5.3 KB | 7.2 KB | 9.2 KB | +| PowerPC | 5.8 KB | 8.0 KB | 10.4 KB | +| MSP430 (16-bit) | 7.8 KB | 10.7 KB | 12.8 KB | +| AVR (ATmega328P) | 9.2 KB | 12.8 KB | 15.4 KB | +| 68HC11 | 13.3 KB | 18.4 KB | 22.6 KB | ### Lean build options -Three compile-time `#define` guards let you strip optional subsystems +Compile-time `#define` guards let you strip optional subsystems for ROM-constrained targets. Define them before including `FR_math.h` (or pass `-D` on the compiler command line): | Define | What it removes | Typical savings | |---|---|---| -| `FR_CORE_ONLY` | Everything below (print + waves) | ~1.9 KB | +| `FR_LEAN` | Degree trig, BAM tan, angle converters, `FR_log10`, `FR_hypot`, waves + ADSR | ~3.7 KB | +| `FR_CORE_ONLY` | Print + waves (shorthand for both below) | ~1.9 KB | | `FR_NO_PRINT` | `FR_printNumF`, `FR_printNumD`, `FR_printNumH`, `FR_numstr` | ~1.3 KB | | `FR_NO_WAVES` | `fr_wave_*` (6 shapes), `fr_adsr_*` (ADSR envelope), `FR_HZ2BAM_INC` | ~0.6 KB | +`FR_LEAN` keeps only radian trig (sin, cos, tan), inverse trig, sqrt, +log2, ln, exp, pow2, and arithmetic — comparable to libfixmath's API at +4.7 KB text. `FR_LEAN` implies `FR_NO_WAVES`. + `FR_CORE_ONLY` is a convenience shorthand that defines both `FR_NO_PRINT` and `FR_NO_WAVES` in one step. @@ -211,7 +227,7 @@ To regenerate this table, run the Docker cross-build (requires the [xelp](https://github.com/deftio/xelp) Docker image): ```bash -scripts/crossbuild-docker.sh +scripts/crossbuild_sizes.sh ``` ### Example: RISC-V diff --git a/docs/examples.md b/docs/examples.md index b7d6145..d54c34e 100644 --- a/docs/examples.md +++ b/docs/examples.md @@ -1,7 +1,7 @@ # Examples Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.0 library with: +example compiles cleanly against the library with: ```bash cc -Isrc example.c src/FR_math.c -o example @@ -57,15 +57,15 @@ int main(void) ## 2. Trig — integer degrees vs radian vs BAM FR_Math supports three angle conventions and this example hits -all three: integer degrees through the legacy -`FR_Sin` / `FR_Cos` API, the radian-native +all three: fixed-point degrees through the +`fr_sin_deg` / `fr_cos_deg` API, the radian-native `fr_sin` / `fr_cos` (radian at a chosen input radix), and BAM-native `fr_sin_bam` / `fr_cos_bam`. All three paths feed the same 129-entry quadrant cosine table under the hood and should produce nearly identical results. -*Caveats:* the `radix` parameter on `FR_Sin(deg, radix)` is +*Caveats:* the `radix` parameter on `fr_sin_deg(deg, radix)` is the radix of the *degree input*, not the output. All sin/cos functions return **s15.16** — that is, `s32` at radix 16, where 1.0 = 65536 (`FR_TRIG_ONE`). The values compared below @@ -200,19 +200,16 @@ int main(void) ## 5. Arctangent and atan2 The inverse-trig functions in FR_Math return angles in -**degrees**, not radians — the output fits in -an `s16` and you can feed it straight back into -`FR_SinI` / `FR_CosI` without any -conversion. This example exercises both `FR_atan` -(single-argument ratio) and `FR_atan2` (full-circle, -two-argument). - -*Caveats:* `FR_atan2` takes only two -arguments (`y`, `x`) and has no radix -parameter — it returns degrees in [−180, 180] as -`s16`. The `radix` argument on -`FR_atan` is the radix of the *input* ratio, -not of the output. +**radians** at a caller-chosen output radix. This +example exercises both `FR_atan` (single-argument +ratio) and `FR_atan2` (full-circle, two-argument). + +*Caveats:* all inverse-trig functions take an +`out_radix` parameter that sets the radix of the +*output*. `FR_atan2(y, x, out_radix)` returns +radians in [−π, π] as `s32` at the chosen +radix. `FR_atan(input, radix, out_radix)` has +separate radixes for input and output. ```c #include @@ -222,18 +219,19 @@ int main(void) { const u16 r = 14; - /* atan(1) = 45 degrees */ - s16 a = FR_atan(I2FR(1, r), r); - printf("atan(1) = %d degrees (expect 45)\n", a); + /* atan(1) = pi/4 radians ≈ 0.7854 */ + s32 a = FR_atan(I2FR(1, r), r, r); + printf("atan(1) = %d (radix %d, expect ~%d)\n", + (int)a, r, (int)(12868)); /* pi/4 at r14 */ /* Full-circle atan2 */ - s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r)); /* 135 deg */ - s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r)); /* -135 deg */ - printf("atan2( 1,-1) = %d\n", q2); - printf("atan2(-1,-1) = %d\n", q3); + s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r); /* 3*pi/4 */ + s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r); /* -3*pi/4 */ + printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603)); + printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603)); /* asin with out-of-domain input */ - s16 bad = FR_asin(I2FR(2, r), r); + s32 bad = FR_asin(I2FR(2, r), r, r); if (bad == FR_DOMAIN_ERROR) printf("asin(2) rejected, good.\n"); return 0; @@ -425,7 +423,7 @@ The `XFormPtI16` fast path takes `s16` coordinates in and writes `s16` out. It's a tiny bit lossier than the `s32` form, but it sidesteps all the fixed-point conversion on the hot path — useful inside -the inner loop of a scanline rasteriser where you already know +the inner loop of a scanline rasterizer where you already know your coordinates fit in 16 bits. *Caveats:* the output is narrowed to `s16`, @@ -511,7 +509,7 @@ int main(void) FR_printNumF(buf_putc, val, 16, 0, 8); printf(" 16 16 0x%08x %s\n", (unsigned)val, buf); /* Expected: "3.14158630" — good through 5 digits, then - * quantisation noise appears. This is the sweet spot for + * quantization noise appears. This is the sweet spot for * most embedded work: 16 bits of fraction fits in an s32 * with 15 bits of integer range (±32767). */ } @@ -562,9 +560,38 @@ at radix 8 the value is `0x324` — only 10 significant bits — so the decimal rendering can only faithfully reproduce about two fractional digits. At radix 24 the value is `0x03243F6A` — 26 significant bits — and seven decimal digits survive. The -eighth digit (`5` vs `4`) shows the quantisation floor: `2^−24 ≈ +eighth digit (`5` vs `4`) shows the quantization floor: `2^−24 ≈ 6 × 10^−8`, so the last digit is always uncertain. +## Desktop example programs + +In addition to the inline snippets above, the `examples/` directory +contains four self-contained desktop programs. Each has its own +`Makefile` and `README.md`; build artifacts stay within the example's +directory. + +| Directory | What it does | +|---|---| +| [`examples/fixed-point-basics/`](../examples/fixed-point-basics/) | Educational walkthrough of radix interpretation, `I2FR`/`FR2I` round-trips, `FR_NUM` constant construction, aligned add/sub, multiply precision, division, saturation, and `FR_printNumF` formatted output. | +| [`examples/log-exp-curves/`](../examples/log-exp-curves/) | Sweeps `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10`, and `FR_sqrt` against IEEE double reference values, printing per-point and summary error tables. | +| [`examples/waveform-synth/`](../examples/waveform-synth/) | Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus an ADSR envelope and amplitude-modulated combination. Default mode renders ASCII art; `--csv` mode outputs machine-readable CSV. | +| [`examples/trig-accuracy/`](../examples/trig-accuracy/) | Head-to-head comparison of FR_Math (`FR_SinI`/`FR_CosI`/`FR_TanI`) vs libfixmath (`fix16_sin`/`fix16_cos`/`fix16_tan`) vs IEEE double over 0–360 degrees. Requires libfixmath source. | + +Build all from the repo root: + +```bash +make examples # builds all desktop examples +make run-examples # builds and runs 1-3, plus 4 if libfixmath present +``` + +Or build any single example from its directory: + +```bash +cd examples/waveform-synth +make run # ASCII art output +make run-csv # CSV output +``` + ## See also - [API Reference](api-reference.md) — full diff --git a/docs/fixed-point-primer.md b/docs/fixed-point-primer.md index 4c127ad..489a127 100644 --- a/docs/fixed-point-primer.md +++ b/docs/fixed-point-primer.md @@ -266,14 +266,14 @@ you want to think of an FR_Math value as a "number with a radix", think of the radix as a *type annotation that lives in your source code*, not a runtime field. -## Quantisation and loss of precision +## Quantization and loss of precision Fixing the radix also fixes the smallest representable fractional step. At radix *N*, that step is `2^−N` — nothing finer survives the round-trip into the integer. Any real value smaller than the step rounds to zero; any real value landing between two adjacent steps rounds to one of them. The difference between the ideal -value and its stored form is called **quantisation error**, and it +value and its stored form is called **quantization error**, and it is the main price paid for doing fractional math in integer registers. @@ -295,7 +295,7 @@ radix 16 and the picture changes: error = 0.00000153 (< 0.002 %) ``` -This behaviour isn't a bug — it is the same compromise IEEE-754 +This behavior isn't a bug — it is the same compromise IEEE-754 floating point makes with its mantissa. The difference is that a float hides the trade-off behind a variable exponent, while fixed-point puts it on a ledger that the programmer chooses up @@ -307,7 +307,7 @@ half the smallest step the application cares about. Any coarser and small signals vanish; any finer and integer headroom is being spent for no benefit. -A second consequence worth recording: quantisation error +A second consequence worth recording: quantization error *accumulates*. Summing a million low-radix values sums the errors too. Signal-processing pipelines with long feedback paths are the main reason to carry accumulators at a wider radix than the @@ -375,7 +375,7 @@ FR_Math ships this operation as callback `f`, which makes it usable on targets without stdio — a UART write, an LCD glyph pusher, a ring-buffer append. The `pad` parameter sets a minimum field width and `prec` sets the number of -fractional digits. Rounding behaviour matches the hand-rolled +fractional digits. Rounding behavior matches the hand-rolled version: excess fractional digits are truncated, and negative values are handled without the two's-complement trap described above. @@ -384,7 +384,7 @@ above. Once you've chosen a radix, the everyday operations behave almost like integer math — with one or two twists per -operation that you just have to internalise. Let's walk +operation that you just have to internalize. Let's walk through them. ### Addition and subtraction @@ -527,7 +527,7 @@ Three things to watch for: it explicitly before the divide. - **Rounding toward zero.** C's integer division truncates toward zero for both signs, so `−7 / 2 == −3` (not `−4`). Fixed-point - division inherits that behaviour. Round-to-nearest can be + division inherits that behavior. Round-to-nearest can be layered on top by adding `b / 2` (for a positive numerator) or `−b / 2` (for a negative numerator) to the pre-scaled numerator before the divide. @@ -557,7 +557,7 @@ for you: - Going to a *smaller* radix — the low bits are dropped. Precision is lost; headroom grows. This is a good place to add `± (1 << (from_r - to_r - 1))` - before the shift if you want round-to-nearest behaviour. + before the shift if you want round-to-nearest behavior. The value is conserved as closely as the destination radix can represent it. Nothing more, nothing less. @@ -620,7 +620,7 @@ and store the result back into a 32-bit register without thinking about it, you will eventually pass a pair of inputs whose product doesn't fit, and plain C will hand you wrap-around garbage with no warning. A signed 32-bit multiply that overflows is not a -runtime error in C — it's undefined behaviour that +runtime error in C — it's undefined behavior that happens to look like data most of the time. FR_Math defends against this in three layers, and it's @@ -711,12 +711,12 @@ you actually need 15 integer bits on that particular signal. ## A worked example: one-pole IIR low-pass filter The sections up to this point have introduced the pieces -individually: scaling, notation, quantisation, arithmetic, +individually: scaling, notation, quantization, arithmetic, overflow, and radix choice. A small end-to-end example is the fastest way to see how those pieces fit together on a real pipeline. The filter walked through below is a single-pole infinite-impulse-response (IIR) low-pass — about the simplest -entry in the DSP catalogue, but realistic enough to exercise +entry in the DSP catalog, but realistic enough to exercise nearly every decision the primer has covered so far. In floating point, the filter is one line of arithmetic: @@ -753,7 +753,7 @@ be picked: `x`, so it shares the same ±32767 output range. But because it accumulates small updates on every sample, it will drift and lose precision unless carried at a higher radix than the raw - input. This is the quantisation-error accumulation noted + input. This is the quantization-error accumulation noted earlier in the primer, showing up in practice. ### Step 2: pick the radixes @@ -841,7 +841,7 @@ feeds both versions a few thousand samples — a mix of sine tones, step inputs, and silence is enough to exercise the relevant paths — and reports the worst-case delta. For a radix-15 one-pole IIR the expected worst-case difference is on the order of a few LSB, -comparable to the inherent quantisation of the 16-bit output +comparable to the inherent quantization of the 16-bit output format and not audible in normal listening. Anything substantially larger indicates a radix choice that is too tight, a rounding mode that is drifting, or a missing int64 promotion on the @@ -862,8 +862,8 @@ generation of each symbol: | Prefix | What it is | Example | | --- | --- | --- | | `FR_XXX()` | `UPPERCASE` macro — inline, zero call overhead. | `FR_ADD`, `FR_ABS`, `FR2I` | -| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Integer-degree trig and related. | `FR_Sin`, `FR_log2`, `FR_sqrt` | -| `fr_xxx()` | Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR). | `fr_sin`, `fr_wave_tri`, `fr_adsr_step` | +| `FR_Xxx()` | Mixed-case C function — the classic v1 API. Log, sqrt, inverse trig, and related. | `FR_log2`, `FR_sqrt`, `FR_atan2` | +| `fr_xxx()` | Lowercase C function — v2 API (degree/radian/BAM trig, wave generators, ADSR). `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` are the current degree-based trig names. `FR_Sin`/`FR_Cos`/`FR_Tan` remain as legacy aliases. | `fr_sin_deg`, `fr_sin`, `fr_wave_tri`, `fr_adsr_step` | | `s8, s16, s32` | Signed integer typedefs (aliases for `int8_t`, `int16_t`, `int32_t`). | — | | `u8, u16, u32` | Unsigned integer typedefs. | — | @@ -917,13 +917,13 @@ Angles deserve their own section because FR_Math gives you angle into?** Because the `u16` wraparound *is* the angular modulus — that's the whole feature. Adding two `u16` BAM values automatically gives you the right answer modulo a full revolution, -with zero quantisation error at the boundary and no `% 65536` in +with zero quantization error at the boundary and no `% 65536` in sight. If BAM were `s32`, every read of the table would have to explicitly mask off the top bits (and handle negative values) before the quadrant extraction (`bam >> 14`) made any sense. You would have traded one free operation for two slow ones on every -sample, just to get back the same behaviour. So instead, the public -trig entry points (`FR_CosI`, `FR_Cos`, `fr_cos`, and friends) +sample, just to get back the same behavior. So instead, the public +trig entry points (`FR_CosI`, `fr_cos_deg`, `fr_cos`, and friends) *all* take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal `fr_cos_bam` / `fr_sin_bam` primitives see the `u16`. In practice you will never construct a diff --git a/docs/getting-started.md b/docs/getting-started.md index 54b7f17..2fefb6d 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -13,8 +13,8 @@ manager integration and no install step. Either: - Copy `src/FR_math.c`, `src/FR_math.h`, `src/FR_defs.h` (and optionally - `src/FR_math_2D.cpp`, `src/FR_math_2D.h`, - and `src/FR_trig_table.h`) into the target project, **or** + `src/FR_math_2D.cpp`, `src/FR_math_2D.h`) + into the target project, **or** - Add FR_Math as a git submodule and point the build system at `src/`. @@ -29,7 +29,7 @@ cd fr_math `build.sh` wipes `build/`, rebuilds the library, examples, and tests, and runs the full test suite. On success -the output shows 42 tests passing across six test binaries. +the output shows all tests passing (99% line coverage). ## A first program @@ -274,8 +274,9 @@ make test # build + run every test suite make coverage # coverage report (requires gcov) ``` -As of v2.0.1, FR_Math ships with 42 passing tests and 99% line -coverage across the library sources. +Run `make test` for a full pass. With `make coverage`, line coverage of +the library sources is about **99%**. See [Building & Testing](building.md) +for targets, cross-compilation, and CI. ## Next steps @@ -284,7 +285,7 @@ coverage across the library sources. conventions work. - **[API Reference](api-reference.md)** — per-symbol inputs, outputs, precision, and saturation - behaviour. + behavior. - **[Examples](examples.md)** — runnable snippets for common tasks. - **[Building & Testing](building.md)** diff --git a/docs/releases.md b/docs/releases.md index 277b811..734eb1b 100644 --- a/docs/releases.md +++ b/docs/releases.md @@ -4,16 +4,27 @@ Release highlights. For the full per-symbol change log, see [release_notes.md](https://github.com/deftio/fr_math/blob/master/release_notes.md) in the repo. +## v2.0.8 — 2026 + +Tangent accuracy rewrite and trig rounding fix. + +- **BAM-native tangent**: new `fr_tan_bam(u16 bam)` with 65-entry octant table (130 bytes). No 64-bit math. `FR_TanI`, `FR_Tan`, `fr_tan` are now thin wrappers. +- **Round-to-nearest fix**: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy. +- **Conversion macro trimming**: `FR_DEG2BAM` and `FR_RAD2BAM` reduced to ~18-21 bits (from ~28 bits). Verified: no measurable accuracy impact. +- **`FR_TRIG_MINVAL` fixed**: now `-FR_TRIG_MAXVAL` (was `-FR_TRIG_MASK`) + +--- + ## v2.0.7 — 2026 README restructure, accuracy table cleanup, expanded cross-compile support. - **`FR_CORE_ONLY` convenience define** — single `#define` strips both print helpers and wave generators - **Accuracy table cleanup** — removed LSB column (percent error is the user-facing metric) -- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build -- **Two-column size table** — Core (`-DFR_CORE_ONLY`) vs Full for every target -- **`scripts/update_sizes.sh`** — auto-patches size tables from `build/sizes.csv` -- README reordered: accuracy table first, then function list, then size table +- **New cross-compile targets** — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build +- **Three-column size table** — Lean / Core / Full for every target, sorted 8-bit → 64-bit +- **Consolidated `scripts/crossbuild_sizes.sh`** — single script runs Docker, builds, writes CSV + markdown, patches docs (replaces three separate scripts) +- README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions --- @@ -124,7 +135,7 @@ with v2.0.0 except where noted. - `FR_DIV(x, xr, y, yr)` — fixed-point division with 64-bit pre-scaling. Now **rounds to nearest** (≤ 0.5 LSB error) instead of truncating. `FR_DIV_TRUNC` preserves the old - truncating behaviour for backward compatibility. `FR_DIV32` is + truncating behavior for backward compatibility. `FR_DIV32` is the 32-bit-only truncating path. - `FR_MOD(x, xr, y, yr)` — fixed-point modulus. @@ -149,7 +160,7 @@ with v2.0.0 except where noted. | FR_atan signature | `(input, radix)` → s16 degrees | `(input, radix, out_radix)` → s32 radians | | FR_atan2 signature | `(y, x)` → s16 degrees | `(y, x, out_radix)` → s32 radians | | FR_BAM2RAD | off by 1024× (bug) | correct | -| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behaviour) | +| FR_DIV rounding | truncates toward zero | rounds to nearest (use `FR_DIV_TRUNC` for old behavior) | --- @@ -187,7 +198,7 @@ mandatory. dropped. - `FR_atan`, `FR_Tan`, `FR_TanI`: wiring and overflow fixes. -- `FR_printNumD/F/H`: fixed undefined behaviour on +- `FR_printNumD/F/H`: fixed undefined behavior on `INT_MIN` and a broken fraction extraction in the v1 code. - `FR_DEG2RAD` / `FR_RAD2DEG`: macro bodies @@ -205,7 +216,7 @@ mandatory. `FR_BAM2DEG`, `FR_RAD2BAM`, `FR_BAM2RAD`. BAM (16 bits per full circle) is the natural integer representation for phase accumulators and - gives zero quantisation at the wraparound. + gives zero quantization at the wraparound. - **Square root and hypot**: `FR_sqrt` uses a digit-by-digit integer isqrt on `int64_t`; `FR_hypot` computes `sqrt(x² + y²)` @@ -259,10 +270,9 @@ mandatory. ### Test suite -v2 ships with **42 tests** across six test binaries -and a characterisation suite (`test_tdd.cpp`) that pins -numerical behaviour to bit-exact reference values. Overall line -coverage is **99%** on the library sources. +v2 ships with a full test suite covering **99%** of library +source lines, plus a characterization suite (`test_tdd.cpp`) +that pins numerical behavior to bit-exact reference values. ## v1.0.3 — 2025 @@ -298,6 +308,5 @@ FR_Math has been in continuous service since **2000**, when it was written to run 2D graphics transforms on 16 MHz 68k Palm Pilots for Trumpetsoft's *Inkstorm*. It has since been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and -16-bit embedded targets. v2.0.7 is the current release with a -full test suite, a bit-exact numerical specification, and CI on -every push. +16-bit embedded targets. The current release has a full test +suite, a bit-exact numerical specification, and CI on every push. diff --git a/examples/README.md b/examples/README.md index c06fc5c..09f2e7f 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,5 +1,8 @@ # FR_Math Examples +This directory contains examples on a few platforms for seeing how FR_math works. For embedded platforms use of the built-in printxxx functions can help provide readable output in various radix formats. + + ## Arduino Examples Each example is a self-contained `.ino` sketch that prints results to @@ -13,17 +16,31 @@ or PlatformIO, then open any example from **File > Examples > FR_Math**. | [wave-generators](wave-generators/wave-generators.ino) | sqr, tri, saw, PWM, noise, ADSR envelope | | [arduino_smoke](arduino_smoke/arduino_smoke.ino) | Compile-only smoke test — exercises every function group | -## Desktop / POSIX Example +## Desktop / POSIX Examples + +Each desktop example is self-contained with its own `Makefile` and `README.md`. +Build artifacts stay within the example's directory. | Example | What it shows | |---|---| -| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms (requires ``, ``) | +| [posix-example](posix-example/FR_Math_Example1.cpp) | Comprehensive demo of all library features including 2D transforms | +| [fixed-point-basics](fixed-point-basics/) | Educational walkthrough: radix, conversions, add/sub/mul/div, saturation, formatted output | +| [log-exp-curves](log-exp-curves/) | Sweep log2/ln/log10, pow2/exp/pow10, sqrt with error tables vs IEEE double | +| [waveform-synth](waveform-synth/) | Wave generators + ADSR envelope with ASCII art and CSV output modes | +| [trig-accuracy](trig-accuracy/) | FR_Math vs libfixmath trig accuracy comparison (requires libfixmath source) | + +Build all from the repo root: + +```bash +make examples # builds all desktop examples +make run-examples # builds and runs examples 1-3, plus 4 if libfixmath present +``` -Build the POSIX example with: +Or build any single example from its directory: ```bash -make examples # produces build/fr_example -./build/fr_example +cd examples/fixed-point-basics +make run ``` ## Using FR_Math in Arduino diff --git a/examples/arduino_smoke/README.md b/examples/arduino_smoke/README.md new file mode 100644 index 0000000..304683e --- /dev/null +++ b/examples/arduino_smoke/README.md @@ -0,0 +1,36 @@ +# Arduino Smoke Test + +Compile-only smoke test that exercises every major FR_Math function group +to verify that `FR_math.c` links cleanly on AVR (avr-gcc) and ARM targets. +No specific hardware required beyond a board that compiles. + +## What it tests + +- Conversions and arithmetic: `I2FR`, `FR_ADD`, `FR_DIV`, `FR_DIV_TRUNC`, `FR_MOD` +- Integer-degree trig: `FR_CosI`, `FR_SinI` +- BAM and radian trig: `fr_cos_bam`, `fr_sin_bam`, `fr_cos`, `fr_sin` +- Inverse trig: `FR_atan2`, `FR_acos` +- Log / exp: `FR_log2`, `FR_ln`, `FR_log10`, `FR_pow2`, `FR_EXP`, `FR_POW10` +- Shift-only variants: `FR_EXP_FAST`, `FR_POW10_FAST` +- Roots: `FR_sqrt`, `FR_hypot`, `FR_hypot_fast`, `FR_hypot_fast8` +- Wave generators: `fr_wave_sqr`, `fr_wave_pwm`, `fr_wave_tri`, `fr_wave_saw`, `fr_wave_tri_morph`, `fr_wave_noise` +- ADSR envelope: `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` +- String parsing: `FR_numstr` + +## Building + +**Arduino CLI** (no upload needed — compile-only test): + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/arduino_smoke +``` + +If it compiles without errors, all function groups link correctly. + +## Expected serial output + +If uploaded and run: + +``` +FR_Math smoke test: all functions linked OK +``` diff --git a/examples/basic-math/README.md b/examples/basic-math/README.md new file mode 100644 index 0000000..5d1cc20 --- /dev/null +++ b/examples/basic-math/README.md @@ -0,0 +1,63 @@ +# Basic Math — Arduino Example + +Demonstrates fixed-point arithmetic fundamentals on Arduino: +conversions, add, subtract, multiply, divide, and utility macros. + +## What it demonstrates + +| Operation | Functions / macros | +|-----------|-------------------| +| Integer to fixed-point | `I2FR`, `FR_INT` | +| Addition / subtraction | `FR_ADD`, `FR_SUB` | +| Multiplication | `FR_FixMuls` (round-to-nearest) | +| Division | `FR_DIV` (64-bit, rounded) | +| Constant construction | `FR_NUM` (build 3.14159 from parts) | +| String parsing | `FR_numstr` | +| Utility macros | `FR_ABS`, `FR_MIN`, `FR_MAX`, `FR_CLAMP` | +| Radix change | `FR_CHRDX` | + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `basic-math.ino` from **File > Examples > FR_Math > basic-math**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/basic-math +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/basic-math +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +**PlatformIO**: + +```bash +pio run -e uno +pio run -e uno -t upload +pio device monitor -b 9600 +``` + +## Expected serial output + +``` +=== FR_Math Basic Arithmetic === + +a = 100 +b = 37 +a + b = 137 +a - b = 63 +a * b = 3700 +a / b = 2 +pi ~ 3 +parsed "-12.75" = -12 +abs(-5) = 5 +min(3,7) = 3 +max(3,7) = 7 +clamp(9,0,5) = 5 +42 @ radix 12 -> radix 8: 42 + +Done. +``` diff --git a/examples/fixed-point-basics/Makefile b/examples/fixed-point-basics/Makefile new file mode 100644 index 0000000..e0eaf83 --- /dev/null +++ b/examples/fixed-point-basics/Makefile @@ -0,0 +1,29 @@ +# fixed-point-basics — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = fixed_point_basics + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): fixed_point_basics.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/fixed-point-basics/README.md b/examples/fixed-point-basics/README.md new file mode 100644 index 0000000..eedd81a --- /dev/null +++ b/examples/fixed-point-basics/README.md @@ -0,0 +1,63 @@ +# Fixed-Point Basics + +An educational walkthrough of fixed-point fundamentals using the FR_Math +library. This example is self-contained and builds independently. + +## What it demonstrates + +| Section | Topic | Key functions / macros | +|---------|-------|----------------------| +| A | Same integer interpreted at radixes 0-15 | `FR2D` | +| B | Integer-to-fixed and back round-trip | `I2FR`, `FR2I` | +| C | Constructing fractional constants (pi) | `FR_NUM` | +| D | Add / subtract with radix alignment | `FR_ADD`, `FR_CHRDX` | +| E | Multiply: precision doubling and truncation | `FR_CHRDX` | +| F | Division (64-bit and 32-bit variants) | `FR_DIV`, `FR_DIV32` | +| G | Saturation vs overflow | `FR_FixAddSat`, `FR_FixMulSat` | +| H | Formatted output via putchar callback | `FR_printNumF` | + +## Building + +```bash +make # compiles fixed_point_basics +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os fixed_point_basics.cpp ../../src/FR_math.c -lm -o fixed_point_basics +``` + +## Expected output + +The program prints a series of labeled sections. Section A shows how +the same raw integer (1234) maps to different floating-point values as +the radix changes. Subsequent sections print tables comparing FR_Math +operations against expected values, demonstrating precision, alignment +rules, and saturation behavior. + +``` +FR_Math — Fixed-Point Basics (v...) + +======================================== + A. Same integer at radixes 0-15 +======================================== + + Raw integer value: 1234 + ... + +======================================== + H. FR_printNumF — formatted fixed-point printing +======================================== + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`, used only for `FR2D` debug macro) diff --git a/examples/fixed-point-basics/fixed_point_basics b/examples/fixed-point-basics/fixed_point_basics new file mode 100755 index 0000000..87578da Binary files /dev/null and b/examples/fixed-point-basics/fixed_point_basics differ diff --git a/examples/fixed-point-basics/fixed_point_basics.cpp b/examples/fixed-point-basics/fixed_point_basics.cpp new file mode 100644 index 0000000..2499ed6 --- /dev/null +++ b/examples/fixed-point-basics/fixed_point_basics.cpp @@ -0,0 +1,236 @@ +/** + * fixed_point_basics.cpp — Educational walkthrough of fixed-point fundamentals + * + * Demonstrates: radix interpretation, I2FR/FR2I conversions, FR_NUM construction, + * add/sub alignment, multiply precision, division, saturation, and FR_printNumF. + * + * Build: make ex_basics + * Run: ./build/ex_basics + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" + +/* putchar callback for FR_printNumF */ +static int put_char(char c) { return putchar(c); } + +/* ------------------------------------------------------------------ */ +static void section(const char *label) +{ + printf("\n========================================\n"); + printf(" %s\n", label); + printf("========================================\n\n"); +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math — Fixed-Point Basics (v%s)\n", FR_MATH_VERSION); + + /* -------------------------------------------------------------- */ + /* A. Same integer at different radixes */ + /* -------------------------------------------------------------- */ + section("A. Same integer at radixes 0-15"); + + s32 val = 1234; + printf(" Raw integer value: %d\n\n", (int)val); + printf(" radix float-equiv precision (1/2^r)\n"); + printf(" ----- ----------- -------------------\n"); + for (int r = 0; r <= 15; r++) { + printf(" %5d %11.5f 1/%-5d = %.8f\n", + r, FR2D(val, r), 1 << r, FR2D(1, r)); + } + + /* -------------------------------------------------------------- */ + /* B. I2FR / FR2I round-trip conversions */ + /* -------------------------------------------------------------- */ + section("B. I2FR / FR2I round-trip conversions"); + + int test_ints[] = {0, 1, -1, 42, -100, 32767}; + int n_tests = (int)(sizeof(test_ints) / sizeof(test_ints[0])); + int rdx = 12; + printf(" Radix = %d\n\n", rdx); + printf(" int -> I2FR(int,%d) -> FR2I(fr,%d) -> float-equiv\n", rdx, rdx); + printf(" ---- ----------- ----------- -----------\n"); + for (int i = 0; i < n_tests; i++) { + s32 fr = I2FR(test_ints[i], rdx); + s32 back = FR2I(fr, rdx); + printf(" %6d %11d %11d %11.4f\n", + test_ints[i], (int)fr, (int)back, FR2D(fr, rdx)); + } + + /* -------------------------------------------------------------- */ + /* C. FR_NUM — construct pi (3.14159) at radix 12 */ + /* -------------------------------------------------------------- */ + section("C. FR_NUM — construct fractional constants"); + + rdx = 12; + s32 pi_fr = FR_NUM(3, 14159, 5, rdx); + s32 neg_half = FR_NUM(0, 5, 1, rdx); + neg_half = -neg_half; /* -0.5 */ + printf(" pi at radix %d: raw = %d, float = %.6f (ref %.6f)\n", + rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159); + printf(" -0.5 at radix %d: raw = %d, float = %.6f\n", + rdx, (int)neg_half, FR2D(neg_half, rdx)); + + rdx = 16; + pi_fr = FR_NUM(3, 14159, 5, rdx); + printf(" pi at radix %d: raw = %d, float = %.6f (ref %.6f)\n", + rdx, (int)pi_fr, FR2D(pi_fr, rdx), 3.14159); + + /* -------------------------------------------------------------- */ + /* D. Add/sub with aligned radix */ + /* -------------------------------------------------------------- */ + section("D. Addition / subtraction with radix alignment"); + + { + int ra = 10, rb = 10; + s32 a = FR_NUM(2, 5, 1, ra); /* 2.5 at radix 10 */ + s32 b = FR_NUM(1, 25, 2, rb); /* 1.25 at radix 10 */ + s32 sum = a + b; /* same radix — direct add */ + printf(" Same radix (%d):\n", ra); + printf(" 2.5 + 1.25 = %.4f (raw: %d + %d = %d)\n", + FR2D(sum, ra), (int)a, (int)b, (int)sum); + + /* Different radixes — must align first */ + int ra2 = 8, rb2 = 12; + s32 a2 = FR_NUM(2, 5, 1, ra2); /* 2.5 at radix 8 */ + s32 b2 = FR_NUM(1, 25, 2, rb2); /* 1.25 at radix 12 */ + + /* Wrong: adding directly without alignment */ + s32 wrong = a2 + b2; + printf("\n Different radixes (a=r%d, b=r%d):\n", ra2, rb2); + printf(" WRONG (no align): raw %d + %d = %d (%.4f ?""?)\n", + (int)a2, (int)b2, (int)wrong, FR2D(wrong, ra2)); + + /* Right: use FR_ADD which aligns b to a's radix */ + s32 a2_copy = a2; + FR_ADD(a2_copy, ra2, b2, rb2); + printf(" RIGHT (FR_ADD): result raw = %d (%.4f)\n", + (int)a2_copy, FR2D(a2_copy, ra2)); + } + + /* -------------------------------------------------------------- */ + /* E. Multiply — precision doubling and truncation */ + /* -------------------------------------------------------------- */ + section("E. Multiply — precision doubling, FR_CHRDX"); + + { + int r = 12; + s32 a = FR_NUM(3, 5, 1, r); /* 3.5 at radix 12 */ + s32 b = FR_NUM(2, 25, 2, r); /* 2.25 at radix 12 */ + s32 product = a * b; /* result is at radix 24 (12+12) */ + printf(" 3.5 * 2.25 at radix %d:\n", r); + printf(" a = %d (%.4f), b = %d (%.4f)\n", + (int)a, FR2D(a, r), (int)b, FR2D(b, r)); + printf(" product raw = %d at radix %d (%.6f)\n", + (int)product, 2 * r, FR2D(product, 2 * r)); + printf(" ref = %.6f\n", 3.5 * 2.25); + + /* Truncate back to original radix */ + s32 truncated = FR_CHRDX(product, 2 * r, r); + printf(" FR_CHRDX to radix %d: %d (%.4f)\n", + r, (int)truncated, FR2D(truncated, r)); + } + + /* -------------------------------------------------------------- */ + /* F. Division */ + /* -------------------------------------------------------------- */ + section("F. Division (FR_DIV, FR_DIV32)"); + + { + int r = 16; + s32 a = I2FR(7, r); + s32 b = I2FR(3, r); + s32 q64 = FR_DIV(a, r, b, r); /* 64-bit intermediate, rounded */ + printf(" 7 / 3 at radix %d:\n", r); + printf(" FR_DIV (64-bit, rounded): %d (%.6f)\n", + (int)q64, FR2D(q64, r)); + printf(" ref = %.6f\n", 7.0 / 3.0); + + /* FR_DIV32 works when x << yr fits in s32. + * At radix 8, x=7 → x<<8 = 1792, well within range. */ + int r8 = 8; + a = I2FR(7, r8); + b = I2FR(3, r8); + s32 q32 = FR_DIV32(a, r8, b, r8); + printf("\n 7 / 3 at radix %d (FR_DIV32, 32-bit only):\n", r8); + printf(" FR_DIV32: %d (%.6f) ref = %.6f\n", + (int)q32, FR2D(q32, r8), 7.0 / 3.0); + + a = I2FR(22, r); + b = I2FR(7, r); + q64 = FR_DIV(a, r, b, r); + printf("\n 22 / 7 at radix %d:\n", r); + printf(" FR_DIV: %d (%.6f) ref = %.6f\n", + (int)q64, FR2D(q64, r), 22.0 / 7.0); + } + + /* -------------------------------------------------------------- */ + /* G. Saturation — overflow vs saturate */ + /* -------------------------------------------------------------- */ + section("G. Saturation (FR_FixAddSat, FR_FixMulSat)"); + + { + s32 big = 0x70000000; + s32 also_big = 0x20000000; + s32 raw_add = big + also_big; /* overflows! */ + s32 sat_add = FR_FixAddSat(big, also_big); + + printf(" Addition overflow:\n"); + printf(" 0x%08X + 0x%08X\n", (unsigned)big, (unsigned)also_big); + printf(" raw add: 0x%08X (%d) — OVERFLOW!\n", + (unsigned)raw_add, (int)raw_add); + printf(" FR_FixAddSat: 0x%08X (%d) — saturated\n", + (unsigned)sat_add, (int)sat_add); + + s32 x = 50000; + s32 y = 50000; + s32 raw_mul = x * y; /* overflows at 32-bit */ + s32 sat_mul = FR_FixMulSat(x, y); + + printf("\n Multiply overflow (as s15.16):\n"); + printf(" %d * %d\n", (int)x, (int)y); + printf(" raw mul: %d — OVERFLOW!\n", (int)raw_mul); + printf(" FR_FixMulSat: %d (%.4f as s15.16) — saturated\n", + (int)sat_mul, FR2D(sat_mul, 16)); + } + + /* -------------------------------------------------------------- */ + /* H. FR_printNumF formatted output */ + /* -------------------------------------------------------------- */ + section("H. FR_printNumF — formatted fixed-point printing"); + + { + int r = 13; + s32 z = (s32)(123.456 * (1 << r)); + s32 zn = -z; + + printf(" z = %d (raw), float = %.4f\n", (int)z, FR2D(z, r)); + printf(" zn = %d (raw), float = %.4f\n\n", (int)zn, FR2D(zn, r)); + + printf(" FR_printNumF(z, r=%d, pad=6, prec=3): ", r); + FR_printNumF(put_char, z, r, 6, 3); + printf("\n"); + + printf(" FR_printNumF(zn, r=%d, pad=6, prec=3): ", r); + FR_printNumF(put_char, zn, r, 6, 3); + printf("\n"); + + printf(" FR_printNumF(z, r=%d, pad=8, prec=5): ", r); + FR_printNumF(put_char, z, r, 8, 5); + printf("\n"); + + printf(" FR_printNumF(zn, r=%d, pad=8, prec=5): ", r); + FR_printNumF(put_char, zn, r, 8, 5); + printf("\n"); + } + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/log-exp-curves/Makefile b/examples/log-exp-curves/Makefile new file mode 100644 index 0000000..6279a60 --- /dev/null +++ b/examples/log-exp-curves/Makefile @@ -0,0 +1,29 @@ +# log-exp-curves — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = log_exp_curves + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): log_exp_curves.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/log-exp-curves/README.md b/examples/log-exp-curves/README.md new file mode 100644 index 0000000..133cd7c --- /dev/null +++ b/examples/log-exp-curves/README.md @@ -0,0 +1,63 @@ +# Log / Exp / Sqrt Curves + +Sweeps the FR_Math logarithmic, exponential, and square-root functions, +printing comparison tables against IEEE 754 double-precision reference +values with per-point and summary error statistics. + +## What it demonstrates + +| Table | Functions tested | Input range | +|-------|-----------------|-------------| +| Log | `FR_log2`, `FR_ln`, `FR_log10` | 0.25 to 10.0 (9 selected points) | +| Exp | `FR_pow2`, `FR_EXP`, `FR_POW10` | -3.0 to 3.0 in 0.5 steps | +| Sqrt | `FR_sqrt` | 0.25 to 100.0 (15 selected points) | + +Each table shows: `input | FR_result | reference | error%` + +A summary line per function gives max |error%| and avg |error%|. + +## Building + +```bash +make # compiles log_exp_curves +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os log_exp_curves.cpp ../../src/FR_math.c -lm -o log_exp_curves +``` + +## Expected output + +``` +FR_Math — Log / Exp / Sqrt Curves (v..., radix=16) + +======================================== + Log functions (input > 0) +======================================== + + input | FR_log2 ref_log2 err% | ... + ... + FR_log2 max |err|: ... avg |err|: ... + +======================================== + Exp functions (input -3.0 to 3.0) +======================================== + ... + +======================================== + Square root (FR_sqrt) +======================================== + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`, for double-precision reference values) diff --git a/examples/log-exp-curves/log_exp_curves b/examples/log-exp-curves/log_exp_curves new file mode 100755 index 0000000..92619fb Binary files /dev/null and b/examples/log-exp-curves/log_exp_curves differ diff --git a/examples/log-exp-curves/log_exp_curves.cpp b/examples/log-exp-curves/log_exp_curves.cpp new file mode 100644 index 0000000..1689b8e --- /dev/null +++ b/examples/log-exp-curves/log_exp_curves.cpp @@ -0,0 +1,192 @@ +/** + * log_exp_curves.cpp — Sweep log/exp/sqrt functions with comparison tables + * + * For each function group, prints a table of FR_math result vs IEEE double + * reference with error%, then a summary line (max_err%, avg_err%). + * + * Build: make ex_logexp + * Run: ./build/ex_logexp + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" + +#define R 16 /* working radix for all tests */ + +static double pct_err(double measured, double ref) +{ + if (fabs(ref) < 1e-12) + return fabs(measured) * 100.0; + return ((measured - ref) / ref) * 100.0; +} + +/* Accumulator for max/avg error tracking */ +typedef struct { + double max_abs_pct; + double sum_abs_pct; + int n; +} err_stats_t; + +static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; } + +static void err_add(err_stats_t *e, double pct) +{ + double a = fabs(pct); + if (a > e->max_abs_pct) e->max_abs_pct = a; + e->sum_abs_pct += a; + e->n++; +} + +static void err_print(err_stats_t *e, const char *label) +{ + printf(" %s max |err|: %.4f%% avg |err|: %.4f%% (%d points)\n", + label, e->max_abs_pct, + e->n > 0 ? e->sum_abs_pct / e->n : 0.0, e->n); +} + +/* ------------------------------------------------------------------ */ +static void section(const char *label) +{ + printf("\n========================================\n"); + printf(" %s\n", label); + printf("========================================\n\n"); +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math — Log / Exp / Sqrt Curves (v%s, radix=%d)\n", FR_MATH_VERSION, R); + + /* -------------------------------------------------------------- */ + /* Log functions: FR_log2, FR_ln, FR_log10 */ + /* -------------------------------------------------------------- */ + section("Log functions (input > 0)"); + + double log_inputs[] = {0.25, 0.5, 1.0, 1.5, 2.0, 3.0, 5.0, 7.0, 10.0}; + int n_log = (int)(sizeof(log_inputs) / sizeof(log_inputs[0])); + + err_stats_t e_log2, e_ln, e_log10; + err_reset(&e_log2); err_reset(&e_ln); err_reset(&e_log10); + + printf(" %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n", + "input", "FR_log2", "ref_log2", "err%", + "FR_ln", "ref_ln", "err%", + "FR_log10", "ref_log10", "err%"); + printf(" %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n", + "--------", "------------", "------------", "--------", + "------------", "------------", "--------", + "------------", "------------", "--------"); + + for (int i = 0; i < n_log; i++) { + double x = log_inputs[i]; + s32 xfr = (s32)(x * (1 << R)); + + double fr_l2 = FR2D(FR_log2(xfr, R, R), R); + double fr_ln = FR2D(FR_ln(xfr, R, R), R); + double fr_l10 = FR2D(FR_log10(xfr, R, R), R); + + double ref_l2 = log2(x); + double ref_ln = log(x); + double ref_l10 = log10(x); + + double e2 = pct_err(fr_l2, ref_l2); + double en = pct_err(fr_ln, ref_ln); + double e10 = pct_err(fr_l10, ref_l10); + + err_add(&e_log2, e2); + err_add(&e_ln, en); + err_add(&e_log10, e10); + + printf(" %-8.4f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n", + x, fr_l2, ref_l2, e2, fr_ln, ref_ln, en, fr_l10, ref_l10, e10); + } + + printf("\n"); + err_print(&e_log2, "FR_log2 "); + err_print(&e_ln, "FR_ln "); + err_print(&e_log10, "FR_log10"); + + /* -------------------------------------------------------------- */ + /* Exp functions: FR_pow2, FR_EXP, FR_POW10 */ + /* -------------------------------------------------------------- */ + section("Exp functions (input -3.0 to 3.0)"); + + err_stats_t e_pow2, e_exp, e_pow10; + err_reset(&e_pow2); err_reset(&e_exp); err_reset(&e_pow10); + + printf(" %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s | %-12s %-12s %-8s\n", + "input", "FR_pow2", "ref_pow2", "err%", + "FR_EXP", "ref_exp", "err%", + "FR_POW10", "ref_pow10", "err%"); + printf(" %-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s-+-%-12s-%-12s-%-8s\n", + "--------", "------------", "------------", "--------", + "------------", "------------", "--------", + "------------", "------------", "--------"); + + for (double x = -3.0; x <= 3.001; x += 0.5) { + s32 xfr = (s32)(x * (1 << R)); + + double fr_p2 = FR2D(FR_pow2(xfr, R), R); + double fr_ex = FR2D(FR_EXP(xfr, R), R); + double fr_p10 = FR2D(FR_POW10(xfr, R), R); + + double ref_p2 = pow(2.0, x); + double ref_ex = exp(x); + double ref_p10 = pow(10.0, x); + + double ep2 = pct_err(fr_p2, ref_p2); + double eex = pct_err(fr_ex, ref_ex); + double ep10 = pct_err(fr_p10, ref_p10); + + err_add(&e_pow2, ep2); + err_add(&e_exp, eex); + err_add(&e_pow10, ep10); + + printf(" %-8.2f | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%% | %12.6f %12.6f %7.3f%%\n", + x, fr_p2, ref_p2, ep2, fr_ex, ref_ex, eex, fr_p10, ref_p10, ep10); + } + + printf("\n"); + err_print(&e_pow2, "FR_pow2 "); + err_print(&e_exp, "FR_EXP "); + err_print(&e_pow10, "FR_POW10"); + + /* -------------------------------------------------------------- */ + /* Square root: FR_sqrt */ + /* -------------------------------------------------------------- */ + section("Square root (FR_sqrt)"); + + double sqrt_inputs[] = {0.25, 0.5, 1.0, 2.0, 3.0, 4.0, 5.0, 7.0, + 9.0, 10.0, 16.0, 25.0, 50.0, 64.0, 100.0}; + int n_sqrt = (int)(sizeof(sqrt_inputs) / sizeof(sqrt_inputs[0])); + + err_stats_t e_sqrt; + err_reset(&e_sqrt); + + printf(" %-10s | %-14s %-14s %-8s\n", + "input", "FR_sqrt", "ref_sqrt", "err%"); + printf(" %-10s-+-%-14s-%-14s-%-8s\n", + "----------", "--------------", "--------------", "--------"); + + for (int i = 0; i < n_sqrt; i++) { + double x = sqrt_inputs[i]; + s32 xfr = (s32)(x * (1 << R)); + double fr_sq = FR2D(FR_sqrt(xfr, R), R); + double ref_sq = sqrt(x); + double esq = pct_err(fr_sq, ref_sq); + err_add(&e_sqrt, esq); + printf(" %-10.4f | %14.6f %14.6f %7.3f%%\n", + x, fr_sq, ref_sq, esq); + } + + printf("\n"); + err_print(&e_sqrt, "FR_sqrt "); + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/posix-example/Makefile b/examples/posix-example/Makefile new file mode 100644 index 0000000..ebb3213 --- /dev/null +++ b/examples/posix-example/Makefile @@ -0,0 +1,29 @@ +# posix-example — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = FR_Math_Example1 + +.PHONY: all run clean + +all: $(TARGET) + +$(TARGET): FR_Math_Example1.cpp $(SRC_DIR)/FR_math.c $(SRC_DIR)/FR_math_2D.cpp + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/posix-example/README.md b/examples/posix-example/README.md new file mode 100644 index 0000000..0c3bb4e --- /dev/null +++ b/examples/posix-example/README.md @@ -0,0 +1,46 @@ +# POSIX Example — Comprehensive FR_Math Demo + +A full-featured desktop demo that exercises most of the FR_Math library +including fixed-point arithmetic, trig functions, error statistics, +2D matrix transforms, and formatted printing. + +## What it demonstrates + +| Topic | Functions / macros used | +|-------|----------------------| +| Overflow in 8-bit arithmetic | Raw C multiply showing wrap-around | +| Radix interpretation | `FR2D`, printing same integer at radixes 0-14 | +| Addition (with saturation) | `FR_FixAddSat` | +| Multiplication (with saturation) | `FR_FixMulSat` | +| 2D matrix transforms | `FR_Matrix2D_CPT`: translate, rotate, inverse, `XFormPtI`, `XFormPtI16` | +| Radix precision effects | Comparing radix 6 vs 11 for round-trip accuracy | +| Forward trig (optional) | `FR_CosI`, `FR_SinI`, `FR_TanI` sweep with error stats | +| Radian trig (optional) | `FR_cos` radian-native path | +| Inverse trig (optional) | `FR_acos` | +| Power / log (optional) | `FR_pow2`, `FR_EXP`, `FR_POW10` sweep | +| Formatted printing | `FR_printNumF` with `putchar` callback, `FR_CEIL`, `FR_FLOOR` | + +Several test sections are gated by flags (`gTestForwardTrig`, etc.) near +the top of the file. Edit them to enable additional sweeps. + +## Building + +```bash +make # compiles FR_Math_Example1 +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os \ + FR_Math_Example1.cpp ../../src/FR_math.c ../../src/FR_math_2D.cpp \ + -lm -o FR_Math_Example1 +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`, `../../src/FR_math_2D.cpp`, `../../src/FR_math_2D.h`) +- Standard C math library (`-lm`) diff --git a/examples/trig-accuracy/Makefile b/examples/trig-accuracy/Makefile new file mode 100644 index 0000000..f81e4b0 --- /dev/null +++ b/examples/trig-accuracy/Makefile @@ -0,0 +1,57 @@ +# trig-accuracy — self-contained build +# All artifacts stay in this directory. +# +# Requires libfixmath source at ../../compare_lfm/libfixmath/libfixmath/ +# +# Usage: +# make Build the example (fails if libfixmath not found) +# make run Build and run +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src +LFM_DIR = ../../compare_lfm/libfixmath/libfixmath + +CXXFLAGS = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os +CFLAGS = -I$(SRC_DIR) -I$(LFM_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = trig_accuracy + +# libfixmath .c sources needed for sin/cos/tan +LFM_SRCS = $(LFM_DIR)/fix16.c $(LFM_DIR)/fix16_trig.c $(LFM_DIR)/fix16_sqrt.c $(LFM_DIR)/fix16_exp.c +LFM_OBJS = fix16.o fix16_trig.o fix16_sqrt.o fix16_exp.o + +.PHONY: all run clean check-lfm + +all: check-lfm $(TARGET) + +check-lfm: + @if [ ! -f $(LFM_DIR)/fix16.h ]; then \ + echo "ERROR: libfixmath not found at $(LFM_DIR)"; \ + echo "This example requires libfixmath source."; \ + echo "Clone it into compare_lfm/libfixmath/ from the repo root."; \ + exit 1; \ + fi + +fix16.o: $(LFM_DIR)/fix16.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_trig.o: $(LFM_DIR)/fix16_trig.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_sqrt.o: $(LFM_DIR)/fix16_sqrt.c + $(CC) $(CFLAGS) -c $< -o $@ + +fix16_exp.o: $(LFM_DIR)/fix16_exp.c + $(CC) $(CFLAGS) -c $< -o $@ + +$(TARGET): trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS) + $(CXX) $(CXXFLAGS) trig_accuracy.cpp $(SRC_DIR)/FR_math.c $(LFM_OBJS) $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/trig-accuracy/README.md b/examples/trig-accuracy/README.md new file mode 100644 index 0000000..8406c62 --- /dev/null +++ b/examples/trig-accuracy/README.md @@ -0,0 +1,80 @@ +# Trig Accuracy — FR_Math vs libfixmath + +Head-to-head trig accuracy comparison between FR_Math and +[libfixmath](https://github.com/PetteriAimworking/libfixmath), +using IEEE 754 double-precision as the reference. + +## What it demonstrates + +- Sweeps sin, cos, and tan over 0-360 degrees in 1-degree steps +- FR_Math calls: `FR_SinI(deg)`, `FR_CosI(deg)`, `FR_TanI(deg)` (integer degrees, s15.16 output) +- libfixmath calls: `fix16_sin`, `fix16_cos`, `fix16_tan` (fix16_t radians, Q16.16 output) +- Reference: `sin()`, `cos()`, `tan()` from `` (IEEE 754 double) + +## Output tables + +**Detail table** (one row per degree): + +``` + deg | FR_sin LFM_sin ref_sin FR_err% LFM_err% | (same for cos) | (same for tan) +``` + +**Summary table**: + +``` + function | FR_max% FR_avg% | LFM_max% LFM_avg% + -----------+--------------------------+--------------------------- + sin | ... ... | ... ... + cos | ... ... | ... ... + tan | ... ... | ... ... +``` + +## Building + +This example requires the libfixmath source tree at +`../../compare_lfm/libfixmath/libfixmath/`. + +```bash +make # compiles trig_accuracy (checks for libfixmath) +make run # compiles and runs +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +LFM=../../compare_lfm/libfixmath/libfixmath +g++ -I../../src -I$LFM -Wall -Os \ + trig_accuracy.cpp ../../src/FR_math.c \ + $LFM/fix16.c $LFM/fix16_trig.c $LFM/fix16_sqrt.c $LFM/fix16_exp.c \ + -lm -o trig_accuracy +``` + +## Expected output + +``` +FR_Math vs libfixmath — Trig Accuracy Comparison (v...) + + deg | FR_sin LFM_sin ref_sin ... + ----+-------------------------------... + 0 | 0.00000 0.00000 0.00000 ... + 1 | 0.01745 0.01745 0.01745 ... + ... + 360 | 0.00000 0.00000 -0.00000 ... + + ============================================================ + Summary + ============================================================ + + function | FR_max% FR_avg% | LFM_max% LFM_avg% + ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- libfixmath source at `../../compare_lfm/libfixmath/libfixmath/` +- Standard C math library (`-lm`) diff --git a/examples/trig-accuracy/trig_accuracy b/examples/trig-accuracy/trig_accuracy new file mode 100755 index 0000000..828f4ac Binary files /dev/null and b/examples/trig-accuracy/trig_accuracy differ diff --git a/examples/trig-accuracy/trig_accuracy.cpp b/examples/trig-accuracy/trig_accuracy.cpp new file mode 100644 index 0000000..189367a --- /dev/null +++ b/examples/trig-accuracy/trig_accuracy.cpp @@ -0,0 +1,139 @@ +/** + * trig_accuracy.cpp — FR_math vs libfixmath vs IEEE double trig comparison + * + * Sweeps 0-360 degrees in 1-degree steps for sin, cos, tan. + * Prints a per-degree detail table and a summary table. + * + * Requires libfixmath source at compare_lfm/libfixmath/libfixmath/. + * Build: make ex_trig_accuracy (only built if libfixmath is present) + * Run: ./build/ex_trig_accuracy + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" +#include "fixmath.h" + +static double pct_err(double measured, double ref) +{ + if (fabs(ref) < 1e-12) + return fabs(measured) * 100.0; + return ((measured - ref) / ref) * 100.0; +} + +typedef struct { + double max_abs_pct; + double sum_abs_pct; + int n; +} err_stats_t; + +static void err_reset(err_stats_t *e) { e->max_abs_pct = 0; e->sum_abs_pct = 0; e->n = 0; } + +static void err_add(err_stats_t *e, double pct) +{ + double a = fabs(pct); + if (a > e->max_abs_pct) e->max_abs_pct = a; + e->sum_abs_pct += a; + e->n++; +} + +/* ================================================================== */ +int main() +{ + printf("FR_Math vs libfixmath — Trig Accuracy Comparison (v%s)\n\n", FR_MATH_VERSION); + + err_stats_t fr_sin_e, fr_cos_e, fr_tan_e; + err_stats_t lf_sin_e, lf_cos_e, lf_tan_e; + err_reset(&fr_sin_e); err_reset(&fr_cos_e); err_reset(&fr_tan_e); + err_reset(&lf_sin_e); err_reset(&lf_cos_e); err_reset(&lf_tan_e); + + /* Header */ + printf(" deg | FR_sin LFM_sin ref_sin FR_err%% LFM_err%%" + " | FR_cos LFM_cos ref_cos FR_err%% LFM_err%%" + " | FR_tan LFM_tan ref_tan FR_err%% LFM_err%%\n"); + printf(" ----+"); + for (int g = 0; g < 3; g++) printf("--------------------------------------------------------------+"); + printf("\n"); + + for (int deg = 0; deg <= 360; deg++) { + double rad_d = deg * M_PI / 180.0; + + /* IEEE double reference */ + double ref_s = sin(rad_d); + double ref_c = cos(rad_d); + double ref_t = tan(rad_d); + + /* FR_math: integer-degree API, result is s15.16 */ + double fr_s = (double)FR_SinI(deg) / 65536.0; + double fr_c = (double)FR_CosI(deg) / 65536.0; + double fr_t = (double)FR_TanI(deg) / 65536.0; + + /* libfixmath: convert degrees to radians as fix16_t */ + fix16_t lf_rad = fix16_from_dbl(rad_d); + double lf_s = fix16_to_dbl(fix16_sin(lf_rad)); + double lf_c = fix16_to_dbl(fix16_cos(lf_rad)); + double lf_t = fix16_to_dbl(fix16_tan(lf_rad)); + + /* Error calculation */ + double fse = pct_err(fr_s, ref_s); + double fce = pct_err(fr_c, ref_c); + double fte = pct_err(fr_t, ref_t); + double lse = pct_err(lf_s, ref_s); + double lce = pct_err(lf_c, ref_c); + double lte = pct_err(lf_t, ref_t); + + err_add(&fr_sin_e, fse); err_add(&fr_cos_e, fce); err_add(&fr_tan_e, fte); + err_add(&lf_sin_e, lse); err_add(&lf_cos_e, lce); err_add(&lf_tan_e, lte); + + /* Clamp tan display for readability near poles */ + int tan_pole = (fabs(ref_t) > 1000.0) ? 1 : 0; + + printf(" %3d |", deg); + printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%", + fr_s, lf_s, ref_s, fse, lse); + printf(" |"); + printf(" %8.5f %8.5f %8.5f %8.3f%% %8.3f%%", + fr_c, lf_c, ref_c, fce, lce); + printf(" |"); + if (tan_pole) + printf(" %10.1f %10.1f %10.1f (pole) (pole)", + fr_t, lf_t, ref_t); + else + printf(" %10.5f %10.5f %10.5f %9.3f%% %9.3f%%", + fr_t, lf_t, ref_t, fte, lte); + printf("\n"); + } + + /* Summary table */ + printf("\n ============================================================\n"); + printf(" Summary\n"); + printf(" ============================================================\n\n"); + printf(" %-10s | %12s %12s | %12s %12s\n", + "function", "FR_max%", "FR_avg%", "LFM_max%", "LFM_avg%"); + printf(" %-10s-+-%-12s-%-12s-+-%-12s-%-12s\n", + "----------", "------------", "------------", + "------------", "------------"); + + #define SUMMARY_ROW(name, fr_e, lf_e) \ + printf(" %-10s | %11.4f%% %11.4f%% | %11.4f%% %11.4f%%\n", \ + name, \ + (fr_e).max_abs_pct, \ + (fr_e).n > 0 ? (fr_e).sum_abs_pct / (fr_e).n : 0.0, \ + (lf_e).max_abs_pct, \ + (lf_e).n > 0 ? (lf_e).sum_abs_pct / (lf_e).n : 0.0) + + SUMMARY_ROW("sin", fr_sin_e, lf_sin_e); + SUMMARY_ROW("cos", fr_cos_e, lf_cos_e); + SUMMARY_ROW("tan", fr_tan_e, lf_tan_e); + + printf("\n FR_math: FR_SinI/FR_CosI/FR_TanI (integer degrees, s15.16 output)\n"); + printf(" libfixmath: fix16_sin/cos/tan (fix16_t radians, Q16.16 output)\n"); + printf(" Reference: IEEE 754 double sin/cos/tan\n"); + + printf("\n--- end ---\n"); + return 0; +} diff --git a/examples/trig-functions/README.md b/examples/trig-functions/README.md new file mode 100644 index 0000000..bf5b31f --- /dev/null +++ b/examples/trig-functions/README.md @@ -0,0 +1,55 @@ +# Trig Functions — Arduino Example + +Demonstrates sin, cos, tan, atan2, and angle-unit conversions using +integer-only fixed-point math on Arduino. + +## What it demonstrates + +| Feature | Functions / macros | +|---------|-------------------| +| Integer-degree trig | `FR_CosI`, `FR_SinI` (s15.16 output) | +| BAM angle conversion | `FR_DEG2BAM`, `fr_cos_bam`, `fr_sin_bam` | +| Radian-native trig | `fr_cos`, `fr_sin`, `fr_tan` (arbitrary input radix) | +| Inverse trig | `FR_atan2`, `FR_acos` | +| Angle conversion | `FR_DEG2RAD` (shift-only, no multiply) | + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `trig-functions.ino` from **File > Examples > FR_Math > trig-functions**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/trig-functions +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/trig-functions +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +## Expected serial output + +``` +=== FR_Math Trigonometry === + +Integer-degree API (s15.16 output): + cos(0) = 65536 sin(0) = 0 + cos(45) = 46341 sin(45) = 46340 + cos(90) = 0 sin(90) = 65536 + ... + +BAM API: + 60 deg -> BAM = 10922 + cos_bam(60) = 32768 + sin_bam(60) = 56756 + +Radian API (radix 12): + cos(1 rad) = 35419 + sin(1 rad) = 55117 + tan(1 rad) = 101994 + ... + +Done. +``` diff --git a/examples/wave-generators/README.md b/examples/wave-generators/README.md new file mode 100644 index 0000000..a262185 --- /dev/null +++ b/examples/wave-generators/README.md @@ -0,0 +1,57 @@ +# Wave Generators — Arduino Example + +Demonstrates square, triangle, sawtooth, PWM, morphing triangle, +noise, and ADSR envelope generators on Arduino — all integer-only. + +## What it demonstrates + +| Feature | Functions / macros | +|---------|-------------------| +| Phase increment | `FR_HZ2BAM_INC` (Hz + sample rate to BAM step) | +| Square wave | `fr_wave_sqr` | +| Triangle wave | `fr_wave_tri` | +| Sawtooth wave | `fr_wave_saw` | +| PWM (variable duty) | `fr_wave_pwm` | +| Morphing triangle | `fr_wave_tri_morph` | +| LFSR noise | `fr_wave_noise` | +| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` | + +All wave functions take a `u16` BAM phase and return `s16` in [-32767, +32767]. +The ADSR envelope returns `s16` in [0, 32767] (unipolar). + +## Hardware + +Any Arduino board with a serial port. Output at 9600 baud. + +## Building + +**Arduino IDE**: Open `wave-generators.ino` from **File > Examples > FR_Math > wave-generators**. + +**Arduino CLI**: + +```bash +arduino-cli compile --fqbn arduino:avr:uno examples/wave-generators +arduino-cli upload --fqbn arduino:avr:uno -p /dev/ttyACM0 examples/wave-generators +arduino-cli monitor -p /dev/ttyACM0 --config baudrate=9600 +``` + +## Expected serial output + +``` +=== FR_Math Wave Generators === + +440 Hz @ 8 kHz -> phase_inc = 3604 + +phase sqr tri saw pwm75 morph +0 32767 0 -32767 32767 0 +4096 32767 8192 -24575 32767 32767 +... + +Noise (10 samples): +16214 -8277 32725 ... + +ADSR envelope (attack=100, decay=200, sustain=0.75, release=400): +327 654 981 ... + +Done. +``` diff --git a/examples/waveform-synth/Makefile b/examples/waveform-synth/Makefile new file mode 100644 index 0000000..33a36a6 --- /dev/null +++ b/examples/waveform-synth/Makefile @@ -0,0 +1,33 @@ +# waveform-synth — self-contained build +# All artifacts stay in this directory. +# +# Usage: +# make Build the example +# make run Build and run (ASCII art mode) +# make run-csv Build and run (CSV output) +# make clean Remove build artifacts + +CC ?= gcc +CXX ?= g++ +SRC_DIR = ../../src + +CXXFLAGS = -I$(SRC_DIR) -Wall -Wextra -Wshadow -Os +LDFLAGS = -lm + +TARGET = waveform_synth + +.PHONY: all run run-csv clean + +all: $(TARGET) + +$(TARGET): waveform_synth.cpp $(SRC_DIR)/FR_math.c + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ + +run: $(TARGET) + ./$(TARGET) + +run-csv: $(TARGET) + ./$(TARGET) --csv + +clean: + rm -f $(TARGET) *.o *.gcda *.gcno diff --git a/examples/waveform-synth/README.md b/examples/waveform-synth/README.md new file mode 100644 index 0000000..da8e8ae --- /dev/null +++ b/examples/waveform-synth/README.md @@ -0,0 +1,76 @@ +# Waveform Synth + +Generates waveforms using FR_Math's wave generators and ADSR envelope, +with both ASCII art visualization and CSV output modes. + +## What it demonstrates + +| Feature | Functions used | +|---------|--------------| +| Square wave | `fr_wave_sqr` | +| Triangle wave | `fr_wave_tri` | +| Sawtooth wave | `fr_wave_saw` | +| PWM (75% duty) | `fr_wave_pwm` | +| Sine wave | `fr_sin_bam` | +| LFSR noise | `fr_wave_noise` | +| ADSR envelope | `fr_adsr_init`, `fr_adsr_trigger`, `fr_adsr_step`, `fr_adsr_release` | +| Amplitude modulation | Sine wave multiplied by ADSR envelope | + +## Building + +```bash +make # compiles waveform_synth +make run # compiles and runs (ASCII art) +make run-csv # compiles and runs (CSV output) +make clean # removes build artifacts +``` + +Or compile manually: + +```bash +g++ -I../../src -Wall -Os waveform_synth.cpp ../../src/FR_math.c -lm -o waveform_synth +``` + +## Running + +**ASCII art mode** (default): + +```bash +./waveform_synth +``` + +Renders each waveform as a 64-column x 21-row ASCII plot, plus the +ADSR envelope and a combined sin * ADSR amplitude-modulation demo. + +**CSV mode**: + +```bash +./waveform_synth --csv +``` + +Outputs CSV with columns: `sample,sqr,tri,saw,pwm,sin,noise,envelope,combined`. +Suitable for importing into a spreadsheet or plotting tool. + +## Expected output (ASCII mode) + +``` +FR_Math — Waveform Synth Demo (v...) + 256 samples/cycle, BAM increment = 256 + + Square (fr_wave_sqr) (256 samples, showing 64 columns) + +max |********... | + 0 | | + -max | ...************************| + + Triangle (fr_wave_tri) ... + ... + Sin * ADSR (amplitude modulation) ... + +--- end --- +``` + +## Dependencies + +- A C++ compiler (g++ or clang++) +- FR_Math source (`../../src/FR_math.c`, `../../src/FR_math.h`, `../../src/FR_defs.h`) +- Standard C math library (`-lm`) diff --git a/examples/waveform-synth/waveform_synth b/examples/waveform-synth/waveform_synth new file mode 100755 index 0000000..815fdce Binary files /dev/null and b/examples/waveform-synth/waveform_synth differ diff --git a/examples/waveform-synth/waveform_synth.cpp b/examples/waveform-synth/waveform_synth.cpp new file mode 100644 index 0000000..988433e --- /dev/null +++ b/examples/waveform-synth/waveform_synth.cpp @@ -0,0 +1,206 @@ +/** + * waveform_synth.cpp — Waveform generation with ASCII art and CSV output + * + * Default (no args): ASCII art rendering of all waveforms + ADSR demo + * --csv flag: CSV output (columns: sample, sqr, tri, saw, pwm, sin, noise, envelope) + * + * Build: make ex_waveform + * Run: ./build/ex_waveform (ASCII art) + * ./build/ex_waveform --csv (CSV output) + * + * Copyright (C) 2001-2026 M. A. Chatterjee — zlib license (see FR_math.h) + */ + +#include +#include + +#include "FR_defs.h" +#include "FR_math.h" + +#define NUM_SAMPLES 256 +#define ROWS 21 +#define COLS 64 +#define BAM_INC (65536 / NUM_SAMPLES) /* one full cycle in 256 samples */ +#define PWM_DUTY 49152 /* 75% duty = 49152/65536 */ + +/* Map s16 [-32767, +32767] to row [0, ROWS-1]. Top row = +32767. */ +static int val_to_row(s16 v) +{ + int row = (int)(ROWS - 1) - (int)(((long)v + 32767L) * (ROWS - 1) / 65534L); + if (row < 0) row = 0; + if (row >= ROWS) row = ROWS - 1; + return row; +} + +/* Subsample: pick COLS points from NUM_SAMPLES evenly */ +static int sample_index(int col) +{ + return (col * NUM_SAMPLES) / COLS; +} + +/* Print one ASCII waveform */ +static void ascii_wave(const char *title, s16 *buf, int n) +{ + printf("\n %s (%d samples, showing %d columns)\n", title, n, COLS); + + /* Build a character grid */ + char grid[ROWS][COLS + 1]; + for (int r = 0; r < ROWS; r++) { + for (int c = 0; c < COLS; c++) + grid[r][c] = ' '; + grid[r][COLS] = '\0'; + } + + /* Place asterisks */ + for (int c = 0; c < COLS; c++) { + int idx = sample_index(c); + if (idx >= n) idx = n - 1; + int r = val_to_row(buf[idx]); + grid[r][c] = '*'; + } + + /* Draw with axis labels */ + for (int r = 0; r < ROWS; r++) { + const char *label = ""; + if (r == 0) label = "+max"; + else if (r == ROWS / 2) label = " 0"; + else if (r == ROWS - 1) label = "-max"; + printf(" %5s |%s|\n", label, grid[r]); + } +} + +/* Print ASCII for ADSR envelope (0..32767 unipolar) */ +static void ascii_envelope(const char *title, s16 *buf, int n) +{ + printf("\n %s (%d samples, showing %d columns)\n", title, n, COLS); + + char grid[ROWS][COLS + 1]; + for (int r = 0; r < ROWS; r++) { + for (int c = 0; c < COLS; c++) + grid[r][c] = ' '; + grid[r][COLS] = '\0'; + } + + for (int c = 0; c < COLS; c++) { + int idx = sample_index(c); + if (idx >= n) idx = n - 1; + /* Envelope is 0..32767; scale to full grid: treat 0 as -32767 for display */ + s16 v = (s16)(buf[idx] * 2 - 32767); + int r = val_to_row(v); + grid[r][c] = '*'; + } + + for (int r = 0; r < ROWS; r++) { + const char *label = ""; + if (r == 0) label = " 1.0"; + else if (r == ROWS / 2) label = " 0.5"; + else if (r == ROWS - 1) label = " 0.0"; + printf(" %5s |%s|\n", label, grid[r]); + } +} + +/* ================================================================== */ +int main(int argc, char *argv[]) +{ + int csv_mode = 0; + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--csv") == 0) csv_mode = 1; + } + + /* Generate waveform buffers */ + s16 buf_sqr[NUM_SAMPLES]; + s16 buf_tri[NUM_SAMPLES]; + s16 buf_saw[NUM_SAMPLES]; + s16 buf_pwm[NUM_SAMPLES]; + s16 buf_sin[NUM_SAMPLES]; + s16 buf_noise[NUM_SAMPLES]; + + u32 noise_state = 0xDEADBEEF; + u16 phase = 0; + + for (int i = 0; i < NUM_SAMPLES; i++) { + buf_sqr[i] = fr_wave_sqr(phase); + buf_tri[i] = fr_wave_tri(phase); + buf_saw[i] = fr_wave_saw(phase); + buf_pwm[i] = fr_wave_pwm(phase, PWM_DUTY); + buf_sin[i] = (s16)(fr_sin_bam(phase) >> 1); /* s15.16 -> s15 approx */ + buf_noise[i] = fr_wave_noise(&noise_state); + phase += BAM_INC; + } + + /* ADSR envelope demo */ + #define ENV_TOTAL 512 + s16 buf_env[ENV_TOTAL]; + + fr_adsr_t env; + fr_adsr_init(&env, + 64, /* attack samples */ + 32, /* decay samples */ + 16384, /* sustain level s0.15 (50%) */ + 64 /* release samples */ + ); + + fr_adsr_trigger(&env); + int release_at = 256; + for (int i = 0; i < ENV_TOTAL; i++) { + if (i == release_at) + fr_adsr_release(&env); + buf_env[i] = fr_adsr_step(&env); + } + + /* Combined: sin * envelope (amplitude modulation) */ + #define COMBINED_LEN ENV_TOTAL + s16 buf_combined[COMBINED_LEN]; + phase = 0; + for (int i = 0; i < COMBINED_LEN; i++) { + s32 sin_val = fr_sin_bam(phase); /* s15.16 */ + s32 env_val = (s32)buf_env[i]; /* 0..32767 (s0.15) */ + /* Multiply: (s15.16 * s0.15) >> 15 = s15.16, then >> 1 for s15 */ + s32 combined = (sin_val * env_val) >> 16; + if (combined > 32767) combined = 32767; + if (combined < -32767) combined = -32767; + buf_combined[i] = (s16)combined; + phase += BAM_INC; + } + + if (csv_mode) { + /* CSV header */ + printf("sample,sqr,tri,saw,pwm,sin,noise,envelope,combined\n"); + + int max_len = COMBINED_LEN; + for (int i = 0; i < max_len; i++) { + printf("%d", i); + printf(",%d", i < NUM_SAMPLES ? buf_sqr[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_tri[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_saw[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_pwm[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_sin[i] : 0); + printf(",%d", i < NUM_SAMPLES ? buf_noise[i] : 0); + printf(",%d", buf_env[i]); + printf(",%d", buf_combined[i]); + printf("\n"); + } + } else { + printf("FR_Math — Waveform Synth Demo (v%s)\n", FR_MATH_VERSION); + printf(" %d samples/cycle, BAM increment = %d\n", NUM_SAMPLES, BAM_INC); + + ascii_wave("Square (fr_wave_sqr)", buf_sqr, NUM_SAMPLES); + ascii_wave("Triangle (fr_wave_tri)", buf_tri, NUM_SAMPLES); + ascii_wave("Sawtooth (fr_wave_saw)", buf_saw, NUM_SAMPLES); + ascii_wave("PWM 75%% (fr_wave_pwm)", buf_pwm, NUM_SAMPLES); + ascii_wave("Sine (fr_sin_bam)", buf_sin, NUM_SAMPLES); + ascii_wave("Noise (fr_wave_noise)", buf_noise, NUM_SAMPLES); + + printf("\n ADSR params: attack=64, decay=32, sustain=50%%, release=64\n"); + printf(" Trigger at sample 0, release at sample %d, total %d samples\n", + release_at, ENV_TOTAL); + ascii_envelope("ADSR Envelope (fr_adsr)", buf_env, ENV_TOTAL); + + ascii_wave("Sin * ADSR (amplitude modulation)", buf_combined, COMBINED_LEN); + + printf("\n Tip: run with --csv to get machine-readable output\n"); + } + + printf("\n--- end ---\n"); + return 0; +} diff --git a/idf_component.yml b/idf_component.yml index 6a0d030..8097972 100644 --- a/idf_component.yml +++ b/idf_component.yml @@ -1,4 +1,4 @@ -version: "2.0.7" +version: "2.0.8" description: "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms. Zero dependencies." url: "https://github.com/deftio/fr_math" repository: "https://github.com/deftio/fr_math.git" diff --git a/keywords.txt b/keywords.txt index 1ab2703..3dc5590 100644 --- a/keywords.txt +++ b/keywords.txt @@ -14,9 +14,9 @@ fr_adsr_t KEYWORD1 FR_FixMuls KEYWORD2 FR_FixMulSat KEYWORD2 FR_FixAddSat KEYWORD2 -FR_Cos KEYWORD2 -FR_Sin KEYWORD2 -FR_Tan KEYWORD2 +fr_sin_deg KEYWORD2 +fr_cos_deg KEYWORD2 +fr_tan_deg KEYWORD2 FR_TanI KEYWORD2 FR_acos KEYWORD2 FR_asin KEYWORD2 @@ -35,6 +35,7 @@ FR_printNumH KEYWORD2 FR_numstr KEYWORD2 fr_cos_bam KEYWORD2 fr_sin_bam KEYWORD2 +fr_tan_bam KEYWORD2 fr_cos KEYWORD2 fr_sin KEYWORD2 fr_tan KEYWORD2 @@ -99,3 +100,6 @@ FR_SLOG2E LITERAL1 FR_SrLOG2E LITERAL1 FR_SLOG2_10 LITERAL1 FR_SrLOG2_10 LITERAL1 +FR_Cos LITERAL1 +FR_Sin LITERAL1 +FR_Tan LITERAL1 diff --git a/library.json b/library.json index 495a89f..17f9649 100644 --- a/library.json +++ b/library.json @@ -1,6 +1,6 @@ { "name": "FR_Math", - "version": "2.0.7", + "version": "2.0.8", "description": "Compact fixed-point math library for embedded systems. Integer-only with caller-selectable radix. Trig, log/exp, sqrt, hypot, wave generators, ADSR, and 2D transforms in 4KB of flash. Zero dependencies.", "keywords": [ "fixed-point", diff --git a/library.properties b/library.properties index cd2d953..47dde32 100644 --- a/library.properties +++ b/library.properties @@ -1,5 +1,5 @@ name=FR_Math -version=2.0.7 +version=2.0.8 author=M. A. Chatterjee maintainer=M. A. Chatterjee sentence=Compact fixed-point math library for embedded systems. 4KB flash, zero dependencies, any radix. diff --git a/llms.txt b/llms.txt index a6d254f..5930e4d 100644 --- a/llms.txt +++ b/llms.txt @@ -9,7 +9,7 @@ or libraries. Pure C99, zero dependencies beyond ``. - Repository: https://github.com/deftio/fr_math - Documentation: https://deftio.github.io/fr_math/ - License: BSD-2-Clause -- Version: 2.0.7 +- Version: 2.0.8 ## Key concept: radix parameter @@ -25,9 +25,8 @@ Common radix choices: ## Source files - `src/FR_math.h` — all public declarations, macros, constants -- `src/FR_math.c` — all function implementations (~42KB) +- `src/FR_math.c` — all function implementations (trig tables inlined, ~45KB) - `src/FR_defs.h` — type aliases: s8, s16, s32, u8, u16, u32 -- `src/FR_trig_table.h` — precomputed sine table (256 entries) - `src/FR_math_2D.h` / `src/FR_math_2D.cpp` — optional 2D transform class (C++) ## Types @@ -82,6 +81,7 @@ s32 fr_tan(s32 rad, u16 radix); // BAM (Binary Angle Measurement) — u16 where 65536 = 360 degrees: s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); +s32 fr_tan_bam(u16 bam); // Degree API at any radix: s32 FR_Cos(s16 deg, u16 radix); @@ -164,20 +164,24 @@ s32 ex = FR_EXP(I2FR(1, R), R); // e^1 ## Building ```bash -make lib # build static library objects -make test # run all test suites -make examples # build example program -make clean # remove build artifacts +make lib # build static library objects +make test # run all test suites +make examples # build example programs +make size-report # cross-compile size report (Docker) +make size-update # size report + patch doc files +make clean # remove build artifacts ``` ## Lean build options Define before including FR_math.h to exclude optional subsystems: +- `FR_CORE_ONLY` — shorthand for FR_NO_PRINT + FR_NO_WAVES (~1.9 KB saved) - `FR_NO_PRINT` — removes FR_printNumF/D/H and FR_numstr (~1.3 KB saved) - `FR_NO_WAVES` — removes fr_wave_*, fr_adsr_*, FR_HZ2BAM_INC (~0.6 KB saved) ## Platform support Tested on: AVR (Arduino), ARM Cortex-M0/M4, ESP32 (Xtensa), RISC-V, -x86/x64, MSP430, 68k, 8051. Code size is 4-8KB at -Os on 32-bit targets. +x86/x64, MSP430, m68k, PowerPC, MIPS32, 68HC11. +Code size is 3-9 KB at -Os on 32-bit targets (Lean to Full). diff --git a/makefile b/makefile index eb8a9fc..196e127 100644 --- a/makefile +++ b/makefile @@ -36,7 +36,8 @@ help: @echo "Build targets:" @echo " all Build library and examples" @echo " lib Build library objects only" - @echo " examples Build example program" + @echo " examples Build all example programs" + @echo " run-examples Build and run all desktop examples" @echo "" @echo "Test targets:" @echo " test Run all tests" @@ -54,9 +55,14 @@ help: @echo " coverage Generate coverage report (gcov)" @echo " coverage-basic Basic coverage info without lcov" @echo " coverage-html HTML coverage report (requires lcov)" - @echo " size-report Multi-architecture size report" + @echo " size-report Multi-architecture size report (Docker)" + @echo " size-update Size report + patch doc files" @echo " size-simple Size report for current platform" @echo "" + @echo "Tools:" + @echo " tools Build diagnostic tools" + @echo " trig-neighborhood Build function neighborhood explorer" + @echo "" @echo "Maintenance:" @echo " clean Remove build artifacts" @echo " cleanall Remove build artifacts and backups" @@ -87,11 +93,38 @@ $(BUILD_DIR)/FR_math_2D.o: $(SRC_DIR)/FR_math_2D.cpp $(HEADERS) # Build examples .PHONY: examples -examples: dirs $(BUILD_DIR)/fr_example +examples: dirs $(BUILD_DIR)/fr_example ex-basics ex-logexp ex-waveform ex-trig-accuracy $(BUILD_DIR)/fr_example: $(EXAMPLE_DIR)/posix-example/FR_Math_Example1.cpp $(BUILD_DIR)/FR_math.o $(BUILD_DIR)/FR_math_2D.o $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@ +# Self-contained desktop examples (each has its own Makefile) +.PHONY: ex-basics ex-logexp ex-waveform ex-trig-accuracy run-examples + +ex-basics: + @$(MAKE) -C $(EXAMPLE_DIR)/fixed-point-basics + +ex-logexp: + @$(MAKE) -C $(EXAMPLE_DIR)/log-exp-curves + +ex-waveform: + @$(MAKE) -C $(EXAMPLE_DIR)/waveform-synth + +ex-trig-accuracy: + @if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \ + $(MAKE) -C $(EXAMPLE_DIR)/trig-accuracy; \ + else \ + echo "Skipping trig-accuracy (libfixmath not found)"; \ + fi + +run-examples: examples + @echo ""; echo "=== fixed-point-basics ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics run + @echo ""; echo "=== log-exp-curves ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves run + @echo ""; echo "=== waveform-synth ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth run + @if [ -f compare_lfm/libfixmath/libfixmath/fix16.h ]; then \ + echo ""; echo "=== trig-accuracy ===" ; $(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy run; \ + fi + # Build and run tests .PHONY: test test: dirs examples test-basic test-comprehensive test-2d test-overflow test-full test-2d-complete test-tdd @@ -192,10 +225,15 @@ coverage-html: clean dirs @echo "HTML report: $(COV_DIR)/html/index.html" @genhtml $(COV_DIR)/coverage.info --output-directory $(COV_DIR)/html -# Size report - multi-architecture +# Size report - multi-architecture (Docker cross-compilation) .PHONY: size-report size-report: dirs - @scripts/size_report.sh + @scripts/crossbuild_sizes.sh + +# Size report + patch doc files +.PHONY: size-update +size-update: dirs + @scripts/crossbuild_sizes.sh --update # Simple size report for current platform .PHONY: size-simple @@ -211,14 +249,62 @@ size-simple: lib ls -lh $(BUILD_DIR)/*.o; \ fi +# Lean build: only functions with libfixmath equivalents (radian trig, +# inverse trig, sqrt, log2, ln, exp, mul/div — no degree trig, no BAM +# tan, no waves, no hypot exact, no log10). +.PHONY: size-lean +size-lean: dirs + @echo "=== LEAN Build (FR_LEAN — libfixmath-equivalent API only) ===" + @$(CC) -I$(SRC_DIR) $(LIB_WARN) -DFR_LEAN -DFR_NO_PRINT -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_lean.o + @size $(BUILD_DIR)/FR_math_lean.o + @echo "" + +# Full build: everything (default — all trig, waves, ADSR, print, etc.) +.PHONY: size-full +size-full: dirs + @echo "=== FULL Build (all features) ===" + @$(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/FR_math_full.o + @size $(BUILD_DIR)/FR_math_full.o + @echo "" + +# Side-by-side lean vs full size comparison +.PHONY: size-compare +size-compare: size-lean size-full + @echo "=== Lean vs Full Comparison ===" + @LEAN=$$(size $(BUILD_DIR)/FR_math_lean.o | tail -1 | awk '{print $$1}'); \ + FULL=$$(size $(BUILD_DIR)/FR_math_full.o | tail -1 | awk '{print $$1}'); \ + echo " Lean text: $${LEAN} bytes"; \ + echo " Full text: $${FULL} bytes" + +# Tools +TOOLS_DIR = tools + +.PHONY: tools +tools: dirs trig-neighborhood + +.PHONY: trig-neighborhood +trig-neighborhood: $(BUILD_DIR)/trig_neighborhood + +$(BUILD_DIR)/trig_neighborhood: $(TOOLS_DIR)/trig_neighborhood.cpp $(SRC_DIR)/FR_math.c $(HEADERS) + $(CC) -I$(SRC_DIR) $(LIB_WARN) -Os -c $(SRC_DIR)/FR_math.c -o $(BUILD_DIR)/tool_FR_math.o + $(CXX) $(CXXFLAGS) $(TOOLS_DIR)/trig_neighborhood.cpp $(BUILD_DIR)/tool_FR_math.o $(LDFLAGS) -o $@ + # Clean .PHONY: clean clean: rm -rf $(BUILD_DIR) $(COV_DIR) - rm -f *.o *.gcda *.gcno *.exe *.info + rm -f *.o *.gcda *.gcno *.gcov *.exe *.info + +.PHONY: clean-examples +clean-examples: + @$(MAKE) -s -C $(EXAMPLE_DIR)/posix-example clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/fixed-point-basics clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/log-exp-curves clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/waveform-synth clean 2>/dev/null || true + @$(MAKE) -s -C $(EXAMPLE_DIR)/trig-accuracy clean 2>/dev/null || true .PHONY: cleanall -cleanall: clean +cleanall: clean clean-examples rm -f *~ $(SRC_DIR)/*~ $(TEST_DIR)/*~ # Basic coverage info without lcov @@ -233,7 +319,7 @@ coverage-basic: clean dirs @echo "" @echo "=== Basic Coverage Info ===" @if command -v gcov >/dev/null 2>&1; then \ - gcov $(SRC_DIR)/FR_math.c -o $(BUILD_DIR) | grep -E "File|Lines executed"; \ + cd $(BUILD_DIR) && gcov FR_math.o | grep -E "File|Lines executed"; \ echo ""; \ echo "For detailed coverage report, install lcov and run: make coverage"; \ else \ diff --git a/pages/assets/site.js b/pages/assets/site.js index a686d8e..0a83734 100644 --- a/pages/assets/site.js +++ b/pages/assets/site.js @@ -4,6 +4,9 @@ into every page so there's exactly one source of truth for the site title, version, menu, and legal line. + Version is loaded from version.json (generated by sync_version.sh) + so there is no hardcoded version string in this file. + Page skeleton expected in each HTML file: @@ -16,8 +19,6 @@ ════════════════════════════════════════════════════════════════════ */ (function () { - var FR_VERSION = 'v2.0.7'; - // Detect whether we're a top-level page or inside guide/. // Works for both file:// and http(s):// because we look for the // literal "/guide/" segment anywhere in the path. @@ -43,7 +44,7 @@ ]; // ----------------------------------------------------------------- - // Build header + // Build header (version placeholder filled in after fetch) // ----------------------------------------------------------------- var headerEl = document.getElementById('site-header'); if (headerEl) { @@ -60,12 +61,34 @@ headerEl.innerHTML = ''; } + // ----------------------------------------------------------------- + // Load version from version.json (generated by sync_version.sh) + // Falls back gracefully: file:// or missing file just hides the tag. + // ----------------------------------------------------------------- + var versionUrl = prefix + 'version.json'; + try { + var xhr = new XMLHttpRequest(); + xhr.open('GET', versionUrl, true); + xhr.onload = function () { + if (xhr.status === 200 || xhr.status === 0) { // 0 for file:// + try { + var data = JSON.parse(xhr.responseText); + var el = document.getElementById('site-version'); + if (el && data.version) { + el.textContent = 'v' + data.version; + } + } catch (e) { /* malformed JSON — leave blank */ } + } + }; + xhr.send(); + } catch (e) { /* XMLHttpRequest blocked (e.g. file:// CORS) — leave blank */ } + // ----------------------------------------------------------------- // Build footer // ----------------------------------------------------------------- diff --git a/pages/guide/api-reference.html b/pages/guide/api-reference.html index 5c03c66..fb67159 100644 --- a/pages/guide/api-reference.html +++ b/pages/guide/api-reference.html @@ -5,7 +5,7 @@ API Reference — FR_Math - + @@ -18,7 +18,7 @@

API Reference

Every public symbol, grouped by topic. Each entry lists the radix -convention, the precision, and the error / saturation behaviour. All +convention, the precision, and the error / saturation behavior. All types are from FR_defs.h: s8 s16 s32 s64 for signed and u8 u16 u32 u64 for unsigned integers (these are aliases for the <stdint.h> types).

@@ -29,7 +29,7 @@

Reading this reference

radix handling and precision separately, because in a mixed-radix library those four things are what actually lets you plan an arithmetic pipeline without hidden -quantisation. If you are new to fixed-point, the +quantization. If you are new to fixed-point, the Fixed-Point Primer explains the notation first; come back here once you’re comfortable reading s15.16 and s0.15.

@@ -101,6 +101,8 @@

Sentinel return values (FR_defs.h)

FR_OVERFLOW_POS0x7FFFFFFF (INT32_MAX)Saturating ops when the true result exceeds +231. FR_OVERFLOW_NEG0x80000000 (INT32_MIN)Saturating ops when the true result is below −231. FR_DOMAIN_ERROR0x80000000 (INT32_MIN)Functions with an invalid input, e.g. FR_sqrt(-1), FR_log2(0), FR_asin(2.0). Shares the bit pattern of FR_OVERFLOW_NEG, so don’t mix a ≤ FR_OVERFLOW_NEG check with a domain check — test for the exact sentinel. +FR_TRIG_MAXVAL0x7FFFFFFF (INT32_MAX)Tangent saturation ceiling. Returned by fr_tan_bam, fr_tan, fr_tan_deg, and FR_TanI when the angle is near a pole (90° + k·180°). +FR_TRIG_MINVAL-FR_TRIG_MAXVALTangent saturation floor. Negative-side pole saturation. @@ -162,7 +164,7 @@

Integer ↔ fixed-point

FR_INT(x, r) x: fixed-point at radix r integer - Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behaviour. + Truncates toward zero. FR_INT(-1, 4) == 0. Useful when you want C’s normal integer-cast behavior. FR_NUM(i, f, d, r) @@ -316,7 +318,7 @@

Utility macros

FR_DIV_TRUNC(x, xr, y, yr) x: numerator at radix xr; y: denominator at radix yr ((s64)(x) << (yr)) / (s32)(y) - Truncating division (rounds toward zero). This was the behaviour of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. + Truncating division (rounds toward zero). This was the behavior of FR_DIV in v2.0.0; use it when you need exact backward compatibility or when the truncation bias is acceptable. FR_DIV32(x, xr, y, yr) @@ -335,7 +337,7 @@

Utility macros

Arithmetic

-

FR_Math splits arithmetic into three flavours. The +

FR_Math splits arithmetic into three flavors. The macros (FR_ADD, FR_SUB) are mixed-radix, inline, and wrap on overflow. The s.16 helper functions (FR_FixMuls, @@ -507,7 +509,7 @@

Why u16 for BAM (not s32)?

“But what if I want to pass in any signed angle without worrying about conversion?” That is exactly what -FR_CosI(deg), FR_Cos(deg, radix), and +FR_CosI(deg), fr_cos_deg(deg, radix), and fr_cos(rad, radix) are for. All three take signed inputs and reduce them to BAM for you. The only place you actually see a u16 is at the internal @@ -564,7 +566,7 @@

Worked example: keeping precision on chips without a multiplier

hand-written DSP inner loop — and the answer has at most ±0.5 LSB of truncation error. The same discipline applies to the other direction: in FR_DEG2BAM the divide-by-360 is -a compile-time constant, so any optimising compiler folds it into a +a compile-time constant, so any optimizing compiler folds it into a multiply-by-reciprocal (or, on a weaker toolchain, a runtime call that you can inline yourself).

@@ -614,6 +616,7 @@

BAM-native (the core)

fr_cos_bams32 fr_cos_bam(u16 bam)s15.16, range [−65536, +65536]. Exact at cardinal angles. fr_sin_bams32 fr_sin_bam(u16 bam)s15.16, range [−65536, +65536]. Exact at cardinal angles. Defined as fr_cos_bam(bam − FR_BAM_QUADRANT). +fr_tan_bams32 fr_tan_bam(u16 bam)s15.16. Uses a 65-entry octant table for [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°−x) for (45°, 90°). Saturates to ±FR_TRIG_MAXVAL at the poles (90°, 270°). Returns exact 0 at 0° and 180°. No 64-bit intermediates; one 32-bit division only in the >45° path. @@ -628,16 +631,18 @@

Radian-native

-

Integer-degree wrappers (legacy API)

+

Degree wrappers

-

The uppercase legacy API takes an angle in degrees. +

The degree API takes an angle in degrees. FR_SinI, FR_CosI and FR_TanI take plain integer degrees — the trailing I denotes -integer. The variants without the I -suffix (FR_Sin, FR_Cos, FR_Tan) -accept a radix argument and treat the degree value as -fixed-point, so you can pass fractional degrees like -42.375°.

+integer. The primary degree wrappers with a +radix argument are fr_sin_deg, +fr_cos_deg, and fr_tan_deg — they +treat the degree value as fixed-point, so you can pass +fractional degrees like 42.375°. The uppercase names +FR_Sin, FR_Cos, and FR_Tan +are legacy aliases that map to the same functions.

@@ -645,24 +650,23 @@

Integer-degree wrappers (legacy API)

- - - + + + + + +
SymbolSignatureKind
FR_SinIFR_SinI(deg)s32 (s15.16)Macro: fr_sin_bam(FR_DEG2BAM(deg)). Zero-cost inline.
FR_CosIFR_CosI(deg)s32 (s15.16)Macro: fr_cos_bam(FR_DEG2BAM(deg)).
FR_TanIs32 FR_TanI(s16 deg)Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
FR_Sins32 FR_Sin(s16 deg, u16 radix)deg is fixed-point at radix. Returns s15.16.
FR_Coss32 FR_Cos(s16 deg, u16 radix)Same.
FR_Tans32 FR_Tan(s16 deg, u16 radix)Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
fr_sin_degs32 fr_sin_deg(s32 deg, u16 radix)Function. deg is fixed-point at radix. Returns s15.16.
fr_cos_degs32 fr_cos_deg(s32 deg, u16 radix)Function. Same.
fr_tan_degs32 fr_tan_deg(s32 deg, u16 radix)Function. Returns at radix 16; saturates to ±INT32_MAX near 90° / 270°.
FR_SinFR_Sin(deg, radix)Legacy macro alias for fr_sin_deg.
FR_CosFR_Cos(deg, radix)Legacy macro alias for fr_cos_deg.
FR_TanFR_Tan(deg, radix)Legacy macro alias for fr_tan_deg.
-

Degree wrappers on the BAM path

+

Degree-to-BAM path

-

If you’re using the lowercase family and want to skip the -radix entirely, two convenience macros cover pure integer degrees:

- - - - - - - -
MacroExpansion
fr_cos_deg(deg)fr_cos_bam(FR_DEG2BAM(deg))
fr_sin_deg(deg)fr_sin_bam(FR_DEG2BAM(deg))
+

fr_cos_deg, fr_sin_deg, and +fr_tan_deg are now functions (not macros). They accept +a fixed-point degree value with a radix argument, +convert to BAM internally, and call the BAM core. For plain integer +degrees with no radix parameter, use FR_CosI / +FR_SinI / FR_TanI instead.

Inverse trigonometry

diff --git a/pages/guide/building.html b/pages/guide/building.html index 10b3739..5ab570f 100644 --- a/pages/guide/building.html +++ b/pages/guide/building.html @@ -107,42 +107,41 @@

tools/make_release.sh

The test suite

-

Tests live under tests/ and are split into six +

Tests live under tests/ and are split into seven binaries to keep compile times low:

- - - - + + - - + + + +
BinaryWhat it checks
test_basicRadix conversions, FR_ADD, FR_FixMuls, rounding.
test_trigInteger-degree trig (FR_Sin et al.).
test_trig_radiansRadian / BAM trig and the v2 fr_sin API.
test_log_expLog base 2 / ln / log10 and their inverses.
fr_testRadix conversions, FR_ADD, FR_FixMuls, rounding (legacy harness).
test_comprehensiveTrig (degree, radian, BAM), log/exp, sqrt, hypot.
test_2d2D transforms, determinants, inverses.
test_full_coverageDark-corner cases: overflow sentinels, edge radixes, round-trips.
test_tddCharacterisation tests pinned to bit-exact reference values.
test_overflowOverflow sentinels, saturation, edge radixes.
test_fullFull-coverage dark-corner cases and round-trips.
test_2d_completeExtended 2D: matrix composition, inverse, point transforms.
test_tddCharacterization tests pinned to bit-exact reference values.
-

As of v2.0.0 the suite contains 42 tests across -those binaries and covers 99% of the library source. +

The suite covers 99% of the library source. Every public symbol is exercised at least once.

Running a single binary

-
make build/test_basic
-./build/test_basic
+
make test-comprehensive
+./build/test_comprehensive
 
 # or all of them at once
 make test

Running the TDD pins after a change

-

test_tdd.cpp is a characterisation suite. It records +

test_tdd.cpp is a characterization suite. It records exact bit patterns for a sample of inputs and fails loudly if those -patterns drift. Any change that modifies the numerical behaviour of +patterns drift. Any change that modifies the numerical behavior of the library will break this suite — that’s the point.

-

If you intended to change the numerical behaviour (e.g. +

If you intended to change the numerical behavior (e.g. you improved a polynomial approximation), update the pinned values in tests/test_tdd.cpp and note the change in release_notes.md along with any updates to the @@ -175,52 +174,68 @@

Cross-compilation

Motorola 68km68k-linux-gnu-gccDocker. Motorola 68HC11m68hc11-gccDocker. PowerPCpowerpc-linux-gnu-gccDocker. +MIPS32mipsel-linux-gnu-gccDocker. Xtensa LX106 (ESP8266)xtensa-lx106-elf-gccDocker. +Xtensa LX7 (ESP32-S3)xtensa-esp-elf-gccDocker (Espressif toolchain). 8051sdccManual.

Code size (.text section, compiled with -Os)

-

Sizes are for FR_math.c compiled with -Os -ffreestanding. -Core = compiled with -DFR_CORE_ONLY (math only, no print, no waves). +

Sizes are for FR_math.c compiled with -Os. +Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt). +Core = -DFR_CORE_ONLY (+ degree trig, BAM tan, log10, hypot). +Full = all features (+ print, waves, ADSR). With -ffunction-sections and linker --gc-sections, only the functions your application references are linked, so real flash usage will be smaller.

- + - - - - - - - - - - + + + + + + + + + + + + + + + + +
TargetCoreFull
TargetLeanCoreFull
RP2040 (Cortex-M0+)2.6 KB4.2 KB
STM32 (Cortex-M4)2.6 KB4.2 KB
RISC-V 32 (rv32imac)3.0 KB4.7 KB
ESP32 (Xtensa)3.5 KB5.2 KB
68k3.5 KB5.3 KB
x86-64 (GCC)3.5 KB5.7 KB
x86-324.5 KB6.8 KB
MSP430 (16-bit)5.9 KB8.9 KB
68HC1110.8 KB16.0 KB
AVR (ATmega328P)7.0 KB10.6 KB
Xtensa LX7 (ESP32-S3)2.9 KB4.2 KB5.3 KB
Cortex-M4 (STM32)3.3 KB4.4 KB5.5 KB
Cortex-M0 (RP2040)3.4 KB4.5 KB5.7 KB
ARM Thumb3.4 KB4.7 KB5.9 KB
RISC-V rv644.0 KB5.5 KB6.8 KB
RISC-V rv324.1 KB5.5 KB6.8 KB
Xtensa LX106 (ESP8266)4.2 KB5.8 KB7.3 KB
ARM324.3 KB5.8 KB7.7 KB
68k4.4 KB6.2 KB7.8 KB
MIPS324.7 KB6.6 KB8.7 KB
x86-64 (GCC)4.6 KB6.1 KB8.0 KB
AArch64 (ARM64)4.8 KB6.6 KB8.7 KB
x86-325.3 KB7.2 KB9.2 KB
PowerPC5.8 KB8.0 KB10.4 KB
MSP430 (16-bit)7.8 KB10.7 KB12.8 KB
AVR (ATmega328P)9.2 KB12.8 KB15.4 KB
68HC1113.3 KB18.4 KB22.6 KB

Lean build options

-

Three compile-time #define guards let you strip optional subsystems +

Compile-time #define guards let you strip optional subsystems for ROM-constrained targets. Define them before including FR_math.h (or pass -D on the compiler command line):

- + +
DefineWhat it removesTypical savings
FR_CORE_ONLYEverything below (print + waves)~1.9 KB
FR_LEANDegree trig, BAM tan, angle converters, FR_log10, FR_hypot, waves + ADSR~3.7 KB
FR_CORE_ONLYPrint + waves (shorthand for both below)~1.9 KB
FR_NO_PRINTFR_printNumF, FR_printNumD, FR_printNumH, FR_numstr~1.3 KB
FR_NO_WAVESfr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC~0.6 KB
+

FR_LEAN keeps only radian trig (sin, cos, tan), inverse trig, sqrt, +log2, ln, exp, pow2, and arithmetic — comparable to libfixmath’s API at +4.7 KB text. FR_LEAN implies FR_NO_WAVES.

+

FR_CORE_ONLY is a convenience shorthand that defines both FR_NO_PRINT and FR_NO_WAVES in one step.

@@ -236,7 +251,7 @@

Lean build options

To regenerate this table, run the Docker cross-build (requires the xelp Docker image):

-
scripts/crossbuild-docker.sh
+
scripts/crossbuild_sizes.sh

Example: RISC-V

diff --git a/pages/guide/examples.html b/pages/guide/examples.html index 137525b..273fede 100644 --- a/pages/guide/examples.html +++ b/pages/guide/examples.html @@ -18,7 +18,7 @@

Examples

Short, runnable snippets for the most common FR_Math tasks. Each -example compiles cleanly against the v2.0.0 library with:

+example compiles cleanly against the library with:

cc -Isrc example.c src/FR_math.c -o example
 ./example
@@ -70,8 +70,9 @@

1. Basic radix conversion

2. Trig — integer degrees vs radian vs BAM

FR_Math supports three angle conventions and this example hits -all three: integer degrees through the legacy -FR_Sin / FR_Cos API, the radian-native +all three: integer degrees through +fr_sin_deg / fr_cos_deg (or the legacy +aliases FR_Sin / FR_Cos), the radian-native fr_sin / fr_cos (radian at a chosen input radix), and BAM-native fr_sin_bam / fr_cos_bam. All three paths feed the same 129-entry @@ -79,7 +80,7 @@

2. Trig — integer degrees vs radian vs BAM

identical results.

Caveats: the radix parameter on -FR_Sin(deg, radix) is the radix of the degree +fr_sin_deg(deg, radix) is the radix of the degree input, not the output. All sin/cos functions return s15.16 — that is, s32 at radix 16, where 1.0 = 65536 (FR_TRIG_ONE). The values compared @@ -208,19 +209,16 @@

4. Logarithm, exponential, decibels

5. Arctangent and atan2

The inverse-trig functions in FR_Math return angles in -degrees, not radians — the output fits in -an s16 and you can feed it straight back into -FR_SinI / FR_CosI without any -conversion. This example exercises both FR_atan -(single-argument ratio) and FR_atan2 (full-circle, -two-argument).

- -

Caveats: FR_atan2 takes only two -arguments (y, x) and has no radix -parameter — it returns degrees in [−180, 180] as -s16. The radix argument on -FR_atan is the radix of the input ratio, -not of the output.

+radians at a caller-chosen output radix. This +example exercises both FR_atan (single-argument +ratio) and FR_atan2 (full-circle, two-argument).

+ +

Caveats: all inverse-trig functions take an +out_radix parameter that sets the radix of the +output. FR_atan2(y, x, out_radix) returns +radians in [−π, π] as s32 at the chosen +radix. FR_atan(input, radix, out_radix) has +separate radixes for input and output.

#include <stdio.h>
 #include "FR_math.h"
@@ -229,18 +227,19 @@ 

5. Arctangent and atan2

{ const u16 r = 14; - /* atan(1) = 45 degrees */ - s16 a = FR_atan(I2FR(1, r), r); - printf("atan(1) = %d degrees (expect 45)\n", a); + /* atan(1) = pi/4 radians ≈ 0.7854 */ + s32 a = FR_atan(I2FR(1, r), r, r); + printf("atan(1) = %d (radix %d, expect ~%d)\n", + (int)a, r, (int)(12868)); /* pi/4 at r14 */ /* Full-circle atan2 */ - s16 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r)); /* 135 deg */ - s16 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r)); /* -135 deg */ - printf("atan2( 1,-1) = %d\n", q2); - printf("atan2(-1,-1) = %d\n", q3); + s32 q2 = FR_atan2(I2FR( 1, r), I2FR(-1, r), r); /* 3*pi/4 */ + s32 q3 = FR_atan2(I2FR(-1, r), I2FR(-1, r), r); /* -3*pi/4 */ + printf("atan2( 1,-1) = %d (expect ~%d)\n", (int)q2, (int)(38603)); + printf("atan2(-1,-1) = %d (expect ~%d)\n", (int)q3, (int)(-38603)); /* asin with out-of-domain input */ - s16 bad = FR_asin(I2FR(2, r), r); + s32 bad = FR_asin(I2FR(2, r), r, r); if (bad == FR_DOMAIN_ERROR) printf("asin(2) rejected, good.\n"); return 0; @@ -423,7 +422,7 @@

10. Integer-only 2D transform for scanline renderers

coordinates in and writes s16 out. It’s a tiny bit lossier than the s32 form, but it sidesteps all the fixed-point conversion on the hot path — useful inside -the inner loop of a scanline rasteriser where you already know +the inner loop of a scanline rasterizer where you already know your coordinates fit in 16 bits.

Caveats: the output is narrowed to s16, @@ -510,7 +509,7 @@

11. String round-trip and radix precision

FR_printNumF(buf_putc, val, 16, 0, 8); printf(" 16 16 0x%08x %s\n", (unsigned)val, buf); /* Expected: "3.14158630" — good through 5 digits, then - * quantisation noise appears. This is the sweet spot for + * quantization noise appears. This is the sweet spot for * most embedded work: 16 bits of fraction fits in an s32 * with 15 bits of integer range (±32767). */ } @@ -558,10 +557,61 @@

11. String round-trip and radix precision

so the decimal rendering can only faithfully reproduce about two fractional digits. At radix 24 the value is 0x03243F6A — 26 significant bits — and seven decimal digits survive. The -eighth digit (5 vs 4) shows the quantisation floor: +eighth digit (5 vs 4) shows the quantization floor: 2^−24 ≈ 6 × 10^−8, so the last digit is always uncertain.

+

Desktop example programs

+ +

In addition to the inline snippets above, the examples/ directory +contains four self-contained desktop programs. Each has its own +Makefile and README.md; build artifacts stay within the +example’s directory.

+ + + + + + + + + + + + + + + + + + + + + +
DirectoryWhat it does
examples/fixed-point-basics/Educational walkthrough of radix interpretation, I2FR/FR2I + round-trips, FR_NUM constant construction, aligned add/sub, + multiply precision, division, saturation, and FR_printNumF + formatted output.
examples/log-exp-curves/Sweeps FR_log2, FR_ln, FR_log10, + FR_pow2, FR_EXP, FR_POW10, and + FR_sqrt against IEEE double reference values, printing + per-point and summary error tables.
examples/waveform-synth/Generates square, triangle, sawtooth, PWM, sine, and noise waveforms plus + an ADSR envelope and amplitude-modulated combination. Default mode renders + ASCII art; --csv mode outputs machine-readable CSV.
examples/trig-accuracy/Head-to-head comparison of FR_Math + (FR_SinI/FR_CosI/FR_TanI) vs + libfixmath (fix16_sin/fix16_cos/fix16_tan) + vs IEEE double over 0–360 degrees. Requires libfixmath source.
+ +

Build all from the repo root:

+ +
make examples        # builds all desktop examples
+make run-examples    # builds and runs 1-3, plus 4 if libfixmath present
+ +

Or build any single example from its directory:

+ +
cd examples/waveform-synth
+make run             # ASCII art output
+make run-csv         # CSV output
+

See also

    diff --git a/pages/guide/fixed-point-primer.html b/pages/guide/fixed-point-primer.html index a4e71c4..73325ef 100644 --- a/pages/guide/fixed-point-primer.html +++ b/pages/guide/fixed-point-primer.html @@ -294,7 +294,7 @@

    Notation: sM.N and the radix

    radix”, think of the radix as a type annotation that lives in your source code, not a runtime field.

    -

    Quantisation and loss of precision

    +

    Quantization and loss of precision

    Fixing the radix also fixes the smallest representable fractional step. At radix N, that step is @@ -302,7 +302,7 @@

    Quantisation and loss of precision

    the round-trip into the integer. Any real value smaller than the step rounds to zero; any real value landing between two adjacent steps rounds to one of them. The difference between the ideal -value and its stored form is called quantisation +value and its stored form is called quantization error, and it is the main price paid for doing fractional math in integer registers.

    @@ -323,7 +323,7 @@

    Quantisation and loss of precision

    error = 0.00000153 (< 0.002 %)
-

This behaviour isn’t a bug — it is the same +

This behavior isn’t a bug — it is the same compromise IEEE-754 floating point makes with its mantissa. The difference is that a float hides the trade-off behind a variable exponent, while fixed-point puts it on a ledger that the @@ -336,7 +336,7 @@

Quantisation and loss of precision

vanish; any finer and integer headroom is being spent for no benefit.

-

A second consequence worth recording: quantisation error +

A second consequence worth recording: quantization error accumulates. Summing a million low-radix values sums the errors too. Signal-processing pipelines with long feedback paths are the main reason to carry accumulators at a wider radix @@ -406,7 +406,7 @@

Displaying a fixed-point value

usable on targets without stdio — a UART write, an LCD glyph pusher, a ring-buffer append. The pad parameter sets a minimum field width and prec sets -the number of fractional digits. Rounding behaviour matches the +the number of fractional digits. Rounding behavior matches the hand-rolled version: excess fractional digits are truncated, and negative values are handled without the two’s-complement trap described above.

@@ -415,7 +415,7 @@

Arithmetic: what the operations actually do

Once you’ve chosen a radix, the everyday operations behave almost like integer math — with one or two twists per -operation that you just have to internalise. Let’s walk +operation that you just have to internalize. Let’s walk through them.

Addition and subtraction

@@ -489,7 +489,7 @@

Multiplication

doesn’t fire. Rounds to nearest — adds 0.5 LSB before the shift.
  • FR_FixMulSat(a, b, r) — same shape with - the same round-to-nearest behaviour, but also saturates to + the same round-to-nearest behavior, but also saturates to FR_OVERFLOW_POS / FR_OVERFLOW_NEG if the result wouldn’t fit. Prefer this one by default unless you’ve proven @@ -555,7 +555,7 @@

    Division

    division truncates toward zero for both signs, so −7 / 2 == −3 (not −4). Fixed-point division inherits - that behaviour. Round-to-nearest can be layered on top by + that behavior. Round-to-nearest can be layered on top by adding b / 2 (for a positive numerator) or −b / 2 (for a negative numerator) to the pre-scaled numerator before the divide.
  • @@ -581,7 +581,7 @@

    Changing radix

  • Going to a smaller radix — the low bits are dropped. Precision is lost; headroom grows. This is a good place to add ± (1 << (from_r - to_r - 1)) - before the shift if you want round-to-nearest behaviour.
  • + before the shift if you want round-to-nearest behavior.

    The value is conserved as closely as the destination radix can @@ -644,7 +644,7 @@

    Overflow, saturation, and the sentinels

    about it, you will eventually pass a pair of inputs whose product doesn’t fit, and plain C will hand you wrap-around garbage with no warning. A signed 32-bit multiply that overflows is not a -runtime error in C — it’s undefined behaviour that +runtime error in C — it’s undefined behavior that happens to look like data most of the time.

    FR_Math defends against this in three layers, and it’s @@ -743,12 +743,12 @@

    Choosing a radix

    A worked example: one-pole IIR low-pass filter

    The sections up to this point have introduced the pieces -individually: scaling, notation, quantisation, arithmetic, +individually: scaling, notation, quantization, arithmetic, overflow, and radix choice. A small end-to-end example is the fastest way to see how those pieces fit together on a real pipeline. The filter walked through below is a single-pole infinite-impulse-response (IIR) low-pass — about the -simplest entry in the DSP catalogue, but realistic enough to +simplest entry in the DSP catalog, but realistic enough to exercise nearly every decision the primer has covered so far.

    In floating point, the filter is one line of arithmetic:

    @@ -790,7 +790,7 @@

    Step 1: inventory the ranges

    ±32767 output range. But because it accumulates small updates on every sample, it will drift and lose precision unless carried at a higher radix than the raw - input. This is the quantisation-error accumulation noted + input. This is the quantization-error accumulation noted earlier in the primer, showing up in practice. @@ -889,7 +889,7 @@

    Step 5: test against the reference

    exercise the relevant paths — and reports the worst-case delta. For a radix-15 one-pole IIR the expected worst-case difference is on the order of a few LSB, comparable to the -inherent quantisation of the 16-bit output format and not +inherent quantization of the 16-bit output format and not audible in normal listening. Anything substantially larger indicates a radix choice that is too tight, a rounding mode that is drifting, or a missing int64 promotion on the @@ -911,8 +911,8 @@

    FR_Math’s naming conventions

    PrefixWhat it isExample FR_XXX()UPPERCASE macro — inline, zero call overhead.FR_ADD, FR_ABS, FR2I -FR_Xxx()Mixed-case C function — the classic v1 API. Integer-degree trig and related.FR_Sin, FR_log2, FR_sqrt -fr_xxx()Lowercase C function — v2 additions (radian / BAM trig, wave generators, ADSR).fr_sin, fr_wave_tri, fr_adsr_step +FR_Xxx()Mixed-case C function or legacy alias. FR_Sin/FR_Cos/FR_Tan are legacy aliases for fr_sin_deg/fr_cos_deg/fr_tan_deg.FR_log2, FR_sqrt, FR_Sin (legacy) +fr_xxx()Lowercase C function — the current API for degree wrappers, radian / BAM trig, wave generators, ADSR.fr_sin_deg, fr_cos_deg, fr_sin, fr_wave_tri, fr_adsr_step s8, s16, s32Signed integer typedefs (aliases for int8_t, int16_t, int32_t).— u8, u16, u32Unsigned integer typedefs.— @@ -973,14 +973,14 @@

    Angle representations

    u16 wraparound is the angular modulus — that’s the whole feature. Adding two u16 BAM values automatically gives you the right answer modulo a full -revolution, with zero quantisation error at the boundary and no +revolution, with zero quantization error at the boundary and no % 65536 in sight. If BAM were s32, every read of the table would have to explicitly mask off the top bits (and handle negative values) before the quadrant extraction (bam >> 14) made any sense. You would have traded one free operation for two slow ones on every sample, just to get -back the same behaviour. So instead, the public trig entry points -(FR_CosI, FR_Cos, fr_cos, and +back the same behavior. So instead, the public trig entry points +(FR_CosI, fr_cos_deg, fr_cos, and friends) all take signed angles — in degrees, fixed-radix degrees, or radians — and only the internal fr_cos_bam / fr_sin_bam primitives see diff --git a/pages/guide/getting-started.html b/pages/guide/getting-started.html index b6a22ed..e5076bf 100644 --- a/pages/guide/getting-started.html +++ b/pages/guide/getting-started.html @@ -31,8 +31,8 @@

    Install

    • Copy src/FR_math.c, src/FR_math.h, src/FR_defs.h (and optionally - src/FR_math_2D.cpp, src/FR_math_2D.h, - and src/FR_trig_table.h) into the target project, or
    • + src/FR_math_2D.cpp, src/FR_math_2D.h) + into the target project, or
    • Add FR_Math as a git submodule and point the build system at src/.
    @@ -46,7 +46,7 @@

    Install

    build.sh wipes build/, rebuilds the library, examples, and tests, and runs the full test suite. On success -the output shows 42 tests passing across six test binaries.

    +the output shows all tests passing (99% line coverage).

    A first program

    @@ -286,8 +286,10 @@

    Running the test suite

    make test           # build + run every test suite
     make coverage       # coverage report (requires gcov)
    -

    As of v2.0.1, FR_Math ships with 42 passing tests and 99% line -coverage across the library sources.

    +

    Run make test for a full pass. With make coverage, +line coverage of the library sources is about 99%. +See Building & Testing for targets, +cross-compilation, and CI.

    Next steps

    @@ -297,7 +299,7 @@

    Next steps

    conventions work.
  • API Reference — per-symbol inputs, outputs, precision, and saturation - behaviour.
  • + behavior.
  • Examples — runnable snippets for common tasks.
  • Building & Testing diff --git a/pages/index.html b/pages/index.html index ac84759..b449193 100644 --- a/pages/index.html +++ b/pages/index.html @@ -34,7 +34,7 @@

    FR_Math

    Tested on gcc, clang, MSVC, IAR, Keil, sdcc, AVR-gcc, MSP430-gcc, RISC-V toolchains, and Arduino.
  • Zero dependencies beyond <stdint.h>.
  • -
  • Parameterised radix: every function takes the binary point as an +
  • Parameterized radix: every function takes the binary point as an argument, so you choose how many fractional bits you need per call.
  • Deterministic, bounded error — every public symbol has a @@ -46,31 +46,35 @@

    Measured accuracy

    Errors below are measured at Q16.16 (s15.16). All functions accept any radix — Q16.16 is just the reference point for the table. -See the TDD -report for sweeps at radixes 8, 12, 16, and 24. -Percent errors skip expected values near zero (|expected| < 0.01).

    +Run make test-tdd to generate the TDD report +(build/test_tdd_report.md) with sweeps at radixes 8, 12, 16, and 24.

    - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + +
    FunctionMax err (%)Avg err (%)Note
    FunctionMax err (%)*Avg err (%)Note
    sin / cos0.71690.010065536-pt sweep + specials
    tan0.71180.016265536-pt sweep (skip poles)
    asin / acos0.70250.010565536-pt; sqrt approx near boundary
    atan20.49530.026865536x5 radii; asin/acos+hypot_fast8
    atan0.29850.015920001-pt sweep [-10,10]; via FR_atan2
    sqrt0.00030.0000Round-to-nearest
    log20.24790.004565-entry mantissa table
    pow20.13730.005765-entry fraction table
    ln, log100.00150.0004Via FR_MULK28 from log2
    exp0.07190.0051FR_MULK28 + FR_pow2
    exp_fast0.07190.0064Shift-only scaling
    pow100.11630.0075FR_MULK28 + FR_pow2
    pow10_fast0.11630.0100Shift-only scaling
    hypot (exact)0.00010.000064-bit intermediate
    hypot_fast8 (8-seg)0.09770.0508Shift-only, no multiply
    sin/cos (BAM)0.15260.0030very fast binary angle trig
    sin/cos (deg)0.15260.0029degree input trig fns
    sin/cos (rad)0.18280.0033radian (traditional) trig
    tan (BAM)0.58230.0008binary angle tangent; ±maxint at poles
    tan (deg)0.53110.0008degree input tangent; saturated at poles
    tan (rad)0.03860.0001radian (traditional) tangent
    asin / acos0.77710.0280reverse trig, radian output
    atan20.25640.0237reverse tangent, always safe
    atan0.24250.0155reverse tangent, accepts up to maxint
    sqrt0.00000.0000Round-to-nearest
    log20.01160.0016shift/add only for speed
    pow20.00180.0004shift/add only for speed
    ln, log100.00040.0000shift/add only for speed
    exp0.00030.0000shift/add only for speed
    exp_fast0.00090.0001Shift-only scaling
    pow100.00050.0000shift/add only for speed
    pow10_fast0.00220.0002Shift-only scaling
    hypot (exact)0.00000.0000Uses 64-bit intermediate
    hypot_fast8 (8-seg)0.09150.0320Shift-only, no multiply
    +

    *Relative error; reference clamped to 1% of full-scale output.

    What’s in the box

    @@ -80,8 +84,8 @@

    What’s in the box

    ArithmeticFR_ADD, FR_SUB, FR_DIV, FR_DIV32, FR_MOD, FR_FixMuls, FR_FixMulSat, FR_CHRDX UtilityFR_MIN, FR_MAX, FR_CLAMP, FR_ABS, FR_SGN -Trig (integer deg)FR_Sin, FR_Cos, FR_Tan, FR_SinI, FR_CosI, FR_TanI -Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_sin_deg, fr_cos_deg +Trig (integer deg)fr_sin_deg, fr_cos_deg, fr_tan_deg, FR_SinI, FR_CosI, FR_TanI +Trig (radian/BAM)fr_sin, fr_cos, fr_tan, fr_sin_bam, fr_cos_bam, fr_tan_bam Inverse trigFR_atan, FR_atan2, FR_asin, FR_acos Log / expFR_log2, FR_ln, FR_log10, FR_pow2, FR_EXP, FR_POW10, FR_EXP_FAST, FR_POW10_FAST, FR_MULK28 RootsFR_sqrt, FR_hypot, FR_hypot_fast8 @@ -98,21 +102,24 @@

    What’s in the box

    Lean build options

    -

    Two compile-time #define guards let you strip optional subsystems +

    Compile-time #define guards let you strip optional subsystems for ROM-constrained targets. Define them before including FR_math.h (or pass -D on the compiler command line):

    +
    DefineWhat it removesTypical savings
    FR_LEANDegree trig, BAM tan, angle converters, FR_log10, FR_hypot, waves + ADSR~3.7 KB
    FR_NO_PRINTFR_printNumF, FR_printNumD, FR_printNumH, FR_numstr~1.3 KB
    FR_NO_WAVESfr_wave_* (6 shapes), fr_adsr_* (ADSR envelope), FR_HZ2BAM_INC~0.6 KB
    -

    With both guards enabled the core math library (trig, inverse trig, log/exp, -sqrt, hypot) compiles to ~3.5 KB on x86-64 / clang -Os. On Thumb-2 this -would be roughly 2.6 KB.

    +

    FR_LEAN keeps only radian trig (sin, cos, tan), inverse trig, +sqrt, log2, ln, exp, pow2, and arithmetic — comparable to libfixmath’s +API but at 4.7 KB text vs libfixmath’s 4.9 KB + 112 KB BSS. +With FR_LEAN + FR_NO_PRINT the library compiles to +~4.7 KB on x86-64 / clang -Os.

    /* Example: headless sensor node — math only, no print, no audio */
     #define FR_NO_PRINT
    @@ -124,18 +131,18 @@ 

    Lean build options

    are most useful when you include the library as a single .c file or static archive without section-level dead-code elimination.

    -

    Why fixed-point, in 2026?

    +

    Why fixed-point?

    -

    Most application code today has an FPU and can use float -freely. But there are still large, interesting corners where -fixed-point pays off:

    +

    Many modern microcontrollers have an FPU and can use float +freely. Older and low-cost MCUs remain common. Fixed-point is often faster and +more deterministic than float, and it excels in situations like:

      -
    • 8- and 16-bit MCUs (AVR, MSP430, 8051, sdcc) where the +
    • 8- and 16-bit MCUs (AVR, MSP430, 8051, SDCC) where the FPU does not exist and even software float is too slow or too large.
    • Hot inner loops on any CPU where a - parameterised-radix integer multiply is faster and more + parameterized-radix integer multiply is faster and more deterministic than a float. Think DSP taps, PID loops, coordinate transforms inside a scanline renderer.
    • Bit-exact reproducibility across compilers, @@ -179,20 +186,25 @@

      Quick taste

      * conversions and simple arithmetic: * I2FR, FR2I, FR_NUM, FR_ADD, FR_DIV, FR_ABS, FR_CHRDX, FR_EXP ... * - * MixedCase FR_ names are functions — they contain loops, tables, or - * multi-step algorithms where inlining would waste ROM: - * FR_Cos, FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... + * MixedCase FR_ names are legacy functions — they still work but + * map to the current lowercase names: + * FR_Cos → fr_cos_deg, FR_Sin → fr_sin_deg, FR_Tan → fr_tan_deg * - * lowercase fr_ names are v2 functions (radian trig, wave generators, - * ADSR envelopes): - * fr_sin, fr_cos, fr_tan, fr_wave_tri, fr_adsr_step ... + * lowercase fr_ names are the current API (degree wrappers, radian + * trig, BAM trig, wave generators, ADSR envelopes): + * fr_cos_deg, fr_sin_deg, fr_tan_deg, fr_sin, fr_cos, fr_tan, + * fr_wave_tri, fr_adsr_step ... + * + * Other MixedCase / lowercase FR_ names are functions with loops, + * tables, or multi-step algorithms: + * FR_sqrt, FR_atan2, FR_log2, FR_pow2, FR_printNumF ... * * Some macros wrap functions: FR_EXP(x,r) scales x then calls * FR_pow2 — one-liner convenience, heavy lifting in the function. */ /* ---- Math functions ---- */ -s32 c45 = FR_Cos(45, 0); /* cos(45°) = 0.7071 */ +s32 c45 = fr_cos_deg(45, 0); /* cos(45°) = 0.7071 */ s32 s30 = fr_sin(FR_numstr("0.5236", R), R); /* sin(0.5236 rad) */ s32 root2 = FR_sqrt(two, R); /* sqrt(2) = 1.4142 */ s32 angle = FR_atan2(I2FR(1,R), I2FR(1,R), R); /* atan2(1,1) rad */ @@ -228,7 +240,7 @@

      Comparison

      Multiply-free optionNoNoYes (e.g. FR_EXP_FAST, FR_hypot_fast8) Wave generatorsNoNo6 shapes + ADSR DependenciesNoneARM onlyNone -Code size (Cortex-M0, -Os)2.4 KB~40 KB+4.2 KB +Code size (Cortex-M0, -Os)2.4 KB~40 KB+3.4 KB lean / 5.7 KB full @@ -237,7 +249,7 @@

      Comparison

      FR_Math includes log/ln/log10, wave generators, ADSR, print helpers, and variable radix. CMSIS-DSP estimate is for the math function subset only. See -docker/build_sizes.sh +scripts/crossbuild_sizes.sh for the build script.

      History

      @@ -246,7 +258,7 @@

      History

      built for graphics transforms on 16 MHz 68k Palm Pilots (it shipped inside Trumpetsoft’s Inkstorm), then ported forward to ARM, x86, MIPS, RISC-V, and various 8/16-bit embedded -targets. v2.0.7 is the current release with a full test suite, +targets. The current release has a full test suite, bit-exact numerical specification, and CI on every push.

      License

      diff --git a/pages/releases.html b/pages/releases.html index 337035a..96e18bb 100644 --- a/pages/releases.html +++ b/pages/releases.html @@ -5,7 +5,7 @@ Releases — FR_Math - + @@ -21,6 +21,19 @@

      Releases

      release_notes.md in the repo.

      +

      v2.0.8 — 2026

      + +

      Tangent accuracy rewrite and trig rounding fix.

      + +
        +
      • BAM-native tangent: new fr_tan_bam(u16 bam) with 65-entry octant table (130 bytes). No 64-bit math. FR_TanI, FR_Tan, fr_tan are now thin wrappers.
      • +
      • Round-to-nearest fix: radian/degree trig wrappers now round instead of truncating when converting to BAM. Peak error drops from ~1.03% to 0.16% on the radian path, matching BAM-native accuracy.
      • +
      • Conversion macro trimming: FR_DEG2BAM and FR_RAD2BAM reduced to ~18–21 bits (from ~28 bits). Verified: no measurable accuracy impact.
      • +
      • FR_TRIG_MINVAL fixed: now -FR_TRIG_MAXVAL (was -FR_TRIG_MASK)
      • +
      + +
      +

      v2.0.7 — 2026

      README restructure, accuracy table cleanup, expanded cross-compile support.

      @@ -28,10 +41,10 @@

      v2.0.7 — 2026

      • FR_CORE_ONLY convenience define — single #define strips both print helpers and wave generators
      • Accuracy table cleanup — removed LSB column (percent error is the user-facing metric)
      • -
      • New cross-compile targets — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11 added to Docker build
      • -
      • Two-column size table — Core (-DFR_CORE_ONLY) vs Full for every target
      • -
      • scripts/update_sizes.sh — auto-patches size tables from build/sizes.csv
      • -
      • README reordered: accuracy table first, then function list, then size table
      • +
      • New cross-compile targets — RP2040 (Cortex-M0+), STM32 (Cortex-M4), 68HC11, MIPS32 added to Docker build
      • +
      • Three-column size table — Lean / Core / Full for every target, sorted 8-bit → 64-bit
      • +
      • scripts/crossbuild_sizes.sh — consolidated script: Docker build, CSV + markdown output, doc patching
      • +
      • README reordered and cleaned up: accuracy table first, badges as standard markdown, concise build flavor descriptions

      @@ -159,7 +172,7 @@

      New utility macros

    • FR_DIV(x, xr, y, yr) — fixed-point division with 64-bit pre-scaling. Now rounds to nearest (≤ 0.5 LSB error) instead of truncating. - FR_DIV_TRUNC preserves the old truncating behaviour + FR_DIV_TRUNC preserves the old truncating behavior for backward compatibility. FR_DIV32 is the 32-bit-only truncating path.
    • FR_MOD(x, xr, y, yr) — fixed-point modulus.
    • @@ -190,7 +203,7 @@

      Breaking changes from v2.0.0

      FR_atan signature(input, radix) → s16 degrees(input, radix, out_radix) → s32 radians FR_atan2 signature(y, x) → s16 degrees(y, x, out_radix) → s32 radians FR_BAM2RADoff by 1024× (bug)correct -FR_DIV roundingtruncates toward zerorounds to nearest (use FR_DIV_TRUNC for old behaviour) +FR_DIV roundingtruncates toward zerorounds to nearest (use FR_DIV_TRUNC for old behavior) @@ -231,7 +244,7 @@

      Numerical fixes

      dropped.
    • FR_atan, FR_Tan, FR_TanI: wiring and overflow fixes.
    • -
    • FR_printNumD/F/H: fixed undefined behaviour on +
    • FR_printNumD/F/H: fixed undefined behavior on INT_MIN and a broken fraction extraction in the v1 code.
    • FR_DEG2RAD / FR_RAD2DEG: macro bodies @@ -251,7 +264,7 @@

      New functionality

      FR_BAM2DEG, FR_RAD2BAM, FR_BAM2RAD. BAM (16 bits per full circle) is the natural integer representation for phase accumulators and - gives zero quantisation at the wraparound.
    • + gives zero quantization at the wraparound.
    • Square root and hypot: FR_sqrt uses a digit-by-digit integer isqrt on int64_t; FR_hypot computes sqrt(x² + y²) @@ -312,10 +325,9 @@

      Breaking changes

      Test suite

      -

      v2 ships with 42 tests across six test binaries -and a characterisation suite (test_tdd.cpp) that pins -numerical behaviour to bit-exact reference values. Overall line -coverage is 99% on the library sources.

      +

      v2 ships with a full test suite covering 99% of library +source lines, plus a characterization suite (test_tdd.cpp) +that pins numerical behavior to bit-exact reference values.

      v1.0.3 — 2025

      @@ -351,7 +363,7 @@

      Timeline

      when it was written to run 2D graphics transforms on 16 MHz 68k Palm Pilots for Trumpetsoft’s Inkstorm. It has since been ported to ARM, x86, MIPS, RISC-V, and a menagerie of 8- and -16-bit embedded targets. v2.0.7 is the current release with a +16-bit embedded targets. The current release has a full test suite, a bit-exact numerical specification, and CI on every push.

      diff --git a/pages/version.json b/pages/version.json new file mode 100644 index 0000000..f81a375 --- /dev/null +++ b/pages/version.json @@ -0,0 +1 @@ +{"version":"2.0.8","hex":"0x020008"} diff --git a/release_management.md b/release_management.md index 3353e3a..214f544 100644 --- a/release_management.md +++ b/release_management.md @@ -20,7 +20,7 @@ All version-bearing files are kept in sync via | `./scripts/build.sh` | Clean rebuild + run tests (one-shot) | | `./scripts/clean_build.sh` | Wipe `build/` and `coverage/`, recreate them | | `./scripts/coverage_report.sh` (or `make coverage`) | gcov coverage table | -| `./scripts/size_report.sh` (or `make size-report`) | Multi-arch object-size report | +| `./scripts/crossbuild_sizes.sh` (or `make size-report`) | Multi-arch object-size report (Docker) | | `./scripts/sync_version.sh` | Propagate `FR_MATH_VERSION_HEX` to every versioned file | | `./scripts/sync_version.sh --check` | Drift check (non-destructive) | | `./tools/make_release.sh` | Guided release pipeline (validate → PR → merge → tag → publish) | @@ -77,7 +77,7 @@ start, so do not run it inside a session that depends on pre-existing Invoked automatically by `make coverage` and by `tools/make_release.sh`. -### `scripts/size_report.sh` — multi-architecture size report +### `scripts/crossbuild_sizes.sh` — multi-architecture size report Compiles `src/FR_math.c` against every cross-toolchain it can find and prints a formatted table of object sizes. Architectures attempted: @@ -242,7 +242,7 @@ invoked individually. | Target | Effect | | --- | --- | -| `make size-report` | Delegates to `scripts/size_report.sh` (multi-arch table) | +| `make size-report` | Delegates to `scripts/crossbuild_sizes.sh` (multi-arch table, Docker) | | `make size-simple` | `size` (or `ls -lh`) on `build/*.o` for the current platform only | ### Clean @@ -290,7 +290,7 @@ loop is: ```bash ./scripts/build.sh # clean rebuild + tests ./scripts/coverage_report.sh # coverage after a change -./scripts/size_report.sh # size after a change +./scripts/crossbuild_sizes.sh # size after a change ``` --- diff --git a/release_notes.md b/release_notes.md index 8a5f3bb..5ff9659 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,5 +1,51 @@ # FR_Math Release Notes +## Version 2.0.8 (2026) + +Tangent accuracy rewrite and trig rounding fix. + +### BAM-native tangent table + +- **New `fr_tan_bam(u16 bam)`** function with a dedicated 65-entry octant + lookup table (`gFR_TAN_TAB_O` in `FR_trig_table.h`, 130 bytes ROM). + First octant uses direct table + lerp; second octant uses the + reciprocal identity `tan(x) = 1/tan(90-x)` with one 32-bit division. + No 64-bit intermediates anywhere in the tan path. +- **`FR_TanI`, `FR_Tan`, `fr_tan`** are now thin wrappers over + `fr_tan_bam`. The old sin/cos division implementation is removed. +- Peak error: 0.17% (BAM), 0.60% (deg r7), 0.17% (rad r16). + +### Round-to-nearest fix for radian/degree wrappers + +- `fr_cos`, `fr_sin`, `fr_tan`, `FR_Cos`, `FR_Sin`, `FR_Tan` now add + 0.5 LSB (`1 << (radix-1)`) before the `>> radix` shift when converting + from radians/degrees to BAM. This rounds to the nearest BAM value + instead of truncating, eliminating a systematic 1-BAM rounding error + that caused ~1% peak error near zero crossings. +- Radian-path sin/cos/tan now match BAM-native accuracy (0.16-0.17% + peak, was ~1.03%). + +### Conversion macro trimming + +- `FR_DEG2BAM`: 10 terms (~28 bits) reduced to 7 terms (~18 bits) +- `FR_RAD2BAM`: 9 terms (~27 bits) reduced to 7 terms (~21 bits) +- `FR_DEG2RAD`: 3 terms (~13 bits) extended to 5 terms (~17 bits) +- 18 bits of precision gives 4 bits of headroom over the 14-bit + effective BAM resolution of the trig tables. Verified: reverting to + the old full-precision macros changes sin/cos peak error by <0.04%. + +### Other + +- `FR_TRIG_MINVAL` fixed: was `-FR_TRIG_MASK` (-65535), now + `-FR_TRIG_MAXVAL` (-2147483647) to properly pair with `FR_TRIG_MAXVAL` + for tan saturation clamping. +- Accuracy table in all docs now shows separate BAM/deg/rad rows for + sin/cos and tan, matching the TDD characterization report. +- `fr_tan_bam` added to function listings across README, docs, HTML + pages, and llms.txt. + +--- + ## Version 2.0.7 (2026) README restructure, accuracy table cleanup, and expanded cross-compile support. @@ -20,16 +66,18 @@ that varies with the chosen radix. - **RP2040 (Cortex-M0+)** and **STM32 (Cortex-M4)** added as named targets in the Docker cross-build -- **68HC11** toolchain added to the Docker image -- Size table now shows two columns: **Core** (`-DFR_CORE_ONLY`) and **Full** -- `docker/build_sizes.sh` outputs `build/sizes.csv` for automated patching -- New `scripts/update_sizes.sh` auto-patches size tables into README, docs, - and HTML pages +- **68HC11** and **MIPS32** toolchains added to the Docker image +- Size table now shows three columns: **Lean**, **Core**, and **Full** +- Consolidated `scripts/crossbuild_sizes.sh` — single script runs Docker, + builds all targets, writes CSV + markdown, and patches doc files + (replaces `crossbuild-docker.sh`, `size_report.sh`, `update_sizes.sh`) +- Size table sorted by architecture width (8-bit → 64-bit) ### README restructure Sections reordered: accuracy table moved above the size table to lead with -the library's primary selling point. Size table now shows Core vs Full columns. +the library's primary selling point. Badges cleaned up from Quikdown HTML to +standard markdown syntax. Build flavor descriptions made more concise. --- diff --git a/scripts/accuracy_report.sh b/scripts/accuracy_report.sh index 1bd5745..f996ac1 100755 --- a/scripts/accuracy_report.sh +++ b/scripts/accuracy_report.sh @@ -86,12 +86,13 @@ patch_markdown() { return fi - # Build replacement block: sentinel + header + separator + data + sentinel + # Build replacement block: sentinel + header + separator + data + footnote + sentinel local replacement replacement=""$'\n' - replacement+="| Function | Max err (%) | Avg err (%) | Note |"$'\n' + replacement+="| Function | Max err (%)*| Avg err (%) | Note |"$'\n' replacement+="|---|---:|---:|---|"$'\n' replacement+="$DATA_ROWS"$'\n' + replacement+=$'\n'"*Relative error; reference clamped to 1% of full-scale output."$'\n' replacement+="" # Use perl to replace between sentinels @@ -137,11 +138,12 @@ patch_html() { local replacement replacement=""$'\n' replacement+=""$'\n' - replacement+=""$'\n' + replacement+=""$'\n' replacement+=""$'\n' replacement+="$html_rows"$'\n' replacement+=""$'\n' replacement+="
      FunctionMax err (%)Avg err (%)Note
      FunctionMax err (%)*Avg err (%)Note
      "$'\n' + replacement+="

      *Relative error; reference clamped to 1% of full-scale output.

      "$'\n' replacement+="" perl -0777 -i -pe " diff --git a/scripts/build.sh b/scripts/build.sh index ade09a4..129c18a 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -75,7 +75,7 @@ echo -e "${GREEN} ok${NC}" # Print host-compiled library sizes so the developer can see how the # objects came out without having to dig in build/. This is host-only; -# for a multi-arch comparison run scripts/size_report.sh. +# for a multi-arch comparison run scripts/crossbuild_sizes.sh. print_host_size() { local host_arch host_arch="$(uname -m 2>/dev/null || echo unknown)" @@ -129,6 +129,6 @@ echo -e "${GREEN}=========================================${NC}" echo "" echo "Next steps:" echo " - ./scripts/coverage_report.sh (coverage analysis)" -echo " - ./scripts/size_report.sh (object file sizes)" +echo " - ./scripts/crossbuild_sizes.sh (object file sizes)" echo " - ./tools/make_release.sh (guided release pipeline)" echo "" diff --git a/scripts/crossbuild-docker.sh b/scripts/crossbuild-docker.sh deleted file mode 100755 index 7f10d6d..0000000 --- a/scripts/crossbuild-docker.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# crossbuild-docker.sh -- cross-compile FR_math inside Docker container -# Runs inside the xelp-crossbuild Docker image. -# Reports object file and .text section sizes for each target. - -set -e - -SRC=/fr_math/src/FR_math.c -INCLUDE="-I/fr_math/src" -OBJ=/tmp/FR_math.o - -SEP="============================================================" - -# Accumulate summary rows: "label|text_size" -SUMMARY="" - -print_sizes() { - local label="$1" - echo "" - echo "$SEP" - echo "$label" - echo "$SEP" - if [ ! -f "$OBJ" ]; then - echo " (build failed)" - SUMMARY="${SUMMARY}${label}|FAIL\n" - return - fi - OBJ_SIZE=$(stat -c%s "$OBJ" 2>/dev/null || wc -c < "$OBJ") - TEXT_SIZE=$(size "$OBJ" 2>/dev/null | awk 'FNR==2{print $1}') - printf " obj file size: %6s bytes\n" "$OBJ_SIZE" - printf " .text section: %6s bytes\n" "$TEXT_SIZE" - SUMMARY="${SUMMARY}${label}|${TEXT_SIZE}\n" - rm -f "$OBJ" -} - -echo "" -echo "FR_Math cross-compilation size report" -echo "Date: $(date -u '+%Y-%m-%d %H:%M UTC')" -echo "" - -# --- x86 --- -gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC x86-64" - -clang -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "Clang x86-64" - -gcc -c $SRC $INCLUDE -Os -m32 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC x86-32" - -tcc -c $SRC $INCLUDE -o $OBJ 2>&1 && true -print_sizes "TCC x86" - -# --- ARM --- -aarch64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AArch64 (ARM64)" - -arm-none-eabi-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC ARM32" - -arm-none-eabi-gcc -c $SRC $INCLUDE -Os -mthumb -Wall -o $OBJ 2>&1 && true -print_sizes "GCC ARM32 Thumb" - -# --- MSP430 --- -# Bare-metal: no stdint.h in sysroot — use fallback typedefs -NOSTD="-DFR_NO_STDINT" - -msp430-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC MSP430" - -# --- AVR --- -avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=avr5 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AVR5 (ATmega328P)" - -avr-gcc -c $SRC $INCLUDE $NOSTD -Os -mmcu=attiny85 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC AVR ATtiny85" - -# --- 68HC11 --- -m68hc11-gcc -c $SRC $INCLUDE $NOSTD -Os -o $OBJ 2>&1 && true -print_sizes "GCC 68HC11" - -# --- 68k (Motorola 68000) --- -m68k-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC m68k" - -# --- PowerPC --- -powerpc-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC PowerPC" - -# --- RISC-V --- -riscv64-linux-gnu-gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC RISC-V (rv64)" - -riscv64-unknown-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -march=rv32imac -mabi=ilp32 -Wall -o $OBJ 2>&1 && true -print_sizes "GCC RISC-V (rv32)" - -# --- Xtensa (ESP8266/ESP32 family) --- -xtensa-lx106-elf-gcc -c $SRC $INCLUDE $NOSTD -Os -Wall -o $OBJ 2>&1 && true -print_sizes "GCC Xtensa LX106 (ESP8266)" - -# --- Function size table (native GCC) --- -echo "" -echo "$SEP" -echo "Function size table (GCC x86-64)" -echo "$SEP" -gcc -c $SRC $INCLUDE -Os -Wall -o $OBJ 2>&1 -nm $OBJ -n -S --size-sort -f sysv -t d 2>/dev/null | grep -E "FUNC" || true -rm -f $OBJ - -# --- Summary table --- -echo "" -echo "$SEP" -echo "Summary: FR_math.c code size (bytes), compiled with -Os" -echo "$SEP" -printf " %-28s %s\n" "Target" ".text (bytes)" -printf " %-28s %s\n" "----------------------------" "-------------" -echo -e "$SUMMARY" | while IFS='|' read -r label size; do - [ -z "$label" ] && continue - printf " %-28s %s\n" "$label" "$size" -done - -echo "" -echo "Done." diff --git a/scripts/crossbuild_sizes.sh b/scripts/crossbuild_sizes.sh new file mode 100755 index 0000000..d6e1f85 --- /dev/null +++ b/scripts/crossbuild_sizes.sh @@ -0,0 +1,286 @@ +#!/usr/bin/env bash +# +# crossbuild_sizes.sh — cross-compile FR_math inside Docker, generate size +# tables, and optionally patch doc files. +# +# Usage: +# scripts/crossbuild_sizes.sh # build, print table, write CSV + MD +# scripts/crossbuild_sizes.sh --update # also patch doc files +# +# Requires: docker, xelp-crossbuild:latest image +# +# Output files: +# build/sizes.csv — raw CSV (target,width,lean,core,full) +# build/sizes.md — markdown table +# +# With --update, patches these files between sentinels: +# README.md — markdown table +# docs/building.md — markdown table +# pages/guide/building.html — HTML + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" +cd "${PROJECT_ROOT}" + +MODE="print" +for arg in "$@"; do + case "$arg" in + --update) MODE="update" ;; + -h|--help) + echo "Usage: scripts/crossbuild_sizes.sh [--update]" + echo " (no args) Build in Docker, print size table, write CSV + MD" + echo " --update Also patch README.md, docs/building.md, pages/guide/building.html" + exit 0 + ;; + *) echo "Unknown option: $arg" >&2; exit 1 ;; + esac +done + +# ----------------------------------------------------------------------- +# 1. Preflight checks +# ----------------------------------------------------------------------- + +if ! command -v docker >/dev/null 2>&1; then + echo "ERROR: docker not found. Install Docker first." >&2 + exit 1 +fi + +if ! docker image inspect xelp-crossbuild:latest >/dev/null 2>&1; then + echo "ERROR: Docker image 'xelp-crossbuild:latest' not found." >&2 + echo "Build it with: docker build -t xelp-crossbuild:latest scripts/" >&2 + exit 1 +fi + +mkdir -p build + +# ----------------------------------------------------------------------- +# 2. Run cross-compilation inside Docker +# ----------------------------------------------------------------------- + +echo "Running cross-compilation in Docker..." + +docker run --rm -v "${PROJECT_ROOT}:/fr_math" xelp-crossbuild:latest \ + /bin/bash -c ' +set -e + +SRC=/fr_math/src/FR_math.c +INCLUDE="-I/fr_math/src" +OBJ=/tmp/FR_math.o +CSV=/fr_math/build/sizes.csv + +LEAN_DEFS="-DFR_LEAN -DFR_NO_PRINT" +CORE_DEFS="-DFR_CORE_ONLY" +FULL_DEFS="" + +build_text_size() { + local compiler="$1" + local flags="$2" + local defs="$3" + rm -f "$OBJ" + if $compiler -c $SRC $INCLUDE $flags $defs -Os -Wall -o $OBJ 2>/dev/null; then + size "$OBJ" 2>/dev/null | awk "FNR==2{print \$1}" + else + echo "FAIL" + fi + rm -f "$OBJ" +} + +build_target() { + local label="$1" + local width="$2" + local compiler="$3" + local flags="$4" + + local lean_sz=$(build_text_size "$compiler" "$flags" "$LEAN_DEFS") + local core_sz=$(build_text_size "$compiler" "$flags" "$CORE_DEFS") + local full_sz=$(build_text_size "$compiler" "$flags" "$FULL_DEFS") + echo "${label},${width},${lean_sz},${core_sz},${full_sz}" >> "$CSV" +} + +# Write CSV header +echo "target,width,lean,core,full" > "$CSV" + +# --- 8-bit --- +NOSTD="-DFR_NO_STDINT" +build_target "AVR ATmega328P" 8 "avr-gcc" "$NOSTD -mmcu=avr5" +build_target "AVR ATtiny85" 8 "avr-gcc" "$NOSTD -mmcu=attiny85" +build_target "68HC11" 8 "m68hc11-gcc" "$NOSTD" + +# --- 16-bit --- +build_target "MSP430" 16 "msp430-gcc" "$NOSTD" + +# --- 32-bit --- +build_target "Cortex-M0 (RP2040)" 32 "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb" +build_target "Cortex-M4 (STM32)" 32 "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb" +build_target "ARM32" 32 "arm-none-eabi-gcc" "" +build_target "ARM Thumb" 32 "arm-none-eabi-gcc" "-mthumb" +build_target "RISC-V rv32" 32 "riscv64-unknown-elf-gcc" "$NOSTD -march=rv32imac -mabi=ilp32" +build_target "Xtensa LX106 (ESP8266)" 32 "xtensa-lx106-elf-gcc" "$NOSTD" +build_target "Xtensa LX7 (ESP32-S3)" 32 "xtensa-esp-elf-gcc" "" +build_target "m68k" 32 "m68k-linux-gnu-gcc" "" +build_target "PowerPC" 32 "powerpc-linux-gnu-gcc" "" +build_target "MIPS32" 32 "mipsel-linux-gnu-gcc" "" +build_target "x86-32" 32 "gcc" "-m32" +build_target "TCC x86" 32 "tcc" "" + +# --- 64-bit --- +build_target "RISC-V rv64" 64 "riscv64-linux-gnu-gcc" "" +build_target "x86-64 (GCC)" 64 "gcc" "" +build_target "x86-64 (Clang)" 64 "clang" "" +build_target "AArch64 (ARM64)" 64 "aarch64-linux-gnu-gcc" "" + +echo "Docker build complete — $(grep -c , "$CSV") rows written to build/sizes.csv" +' + +# ----------------------------------------------------------------------- +# 3. Generate tables on host +# ----------------------------------------------------------------------- + +CSV="build/sizes.csv" + +if [ ! -f "${CSV}" ]; then + echo "ERROR: ${CSV} not found after Docker run." >&2 + exit 1 +fi + +# Sort by width ascending, then full size ascending (skip header) +SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k5,5n) + +if [ -z "${SORTED}" ]; then + echo "ERROR: No data rows in ${CSV}" >&2 + exit 1 +fi + +# Format bytes as X.X KB using integer math (no bc dependency) +fmt_kb() { + local val="$1" + if [[ "${val}" =~ ^[0-9]+$ ]]; then + local whole=$((val / 1024)) + local frac=$(( (val % 1024) * 10 / 1024 )) + echo "${whole}.${frac} KB" + else + echo "${val}" + fi +} + +# --- Console summary --- +echo "" +echo "============================================================" +echo "FR_math.c code size (.text bytes), compiled with -Os" +echo "Sorted by architecture width (8-bit → 64-bit)" +echo "============================================================" +echo "" +printf " %-28s %5s %8s %8s %8s\n" "Target" "Width" "Lean" "Core" "Full" +printf " %-28s %5s %8s %8s %8s\n" "----------------------------" "-----" "--------" "--------" "--------" +while IFS=',' read -r target width lean core full; do + printf " %-28s %4s-b %8s %8s %8s\n" "$target" "$width" "$lean" "$core" "$full" +done <<< "${SORTED}" +echo "" +echo "Lean = -DFR_LEAN -DFR_NO_PRINT (radian trig, inv trig, log/exp, sqrt)" +echo "Core = -DFR_CORE_ONLY (Lean + degree/BAM trig, log10, hypot)" +echo "Full = default (Core + print, waves, ADSR)" +echo "" + +# --- build/sizes.md --- +{ + echo "# FR_math.c Code Sizes (.text bytes, -Os)" + echo "" + echo "Sorted by architecture width (8-bit → 64-bit)." + echo "" + echo "| Target | Lean | Core | Full |" + echo "|--------|-----:|-----:|-----:|" + while IFS=',' read -r target width lean core full; do + printf "| %s | %s | %s | %s |\n" "$target" "$(fmt_kb "$lean")" "$(fmt_kb "$core")" "$(fmt_kb "$full")" + done <<< "${SORTED}" + echo "" + echo "**Lean** (\`-DFR_LEAN -DFR_NO_PRINT\`): radian trig, inv trig, log/exp, sqrt." + echo "**Core** (\`-DFR_CORE_ONLY\`): Lean + degree/BAM trig, log10, hypot." + echo "**Full** (default): Core + formatted print, wave generators, ADSR envelope." +} > build/sizes.md + +echo "Wrote build/sizes.csv and build/sizes.md" + +if [ "${MODE}" != "update" ]; then + exit 0 +fi + +# ----------------------------------------------------------------------- +# 4. Patch doc files +# ----------------------------------------------------------------------- + +# Build markdown replacement block (width column is for sorting only, omit from output) +MD_ROWS="" +while IFS=',' read -r target width lean core full; do + row="| ${target} | $(fmt_kb "${lean}") | $(fmt_kb "${core}") | $(fmt_kb "${full}") |" + if [ -n "${MD_ROWS}" ]; then + MD_ROWS+=$'\n' + fi + MD_ROWS+="${row}" +done <<< "${SORTED}" + +MD_TABLE=""$'\n' +MD_TABLE+="| Target | Lean | Core | Full |"$'\n' +MD_TABLE+="|--------|-----:|-----:|-----:|"$'\n' +MD_TABLE+="${MD_ROWS}"$'\n' +MD_TABLE+="" + +# Patch a markdown file between sentinels +patch_markdown() { + local file="$1" + if [ ! -f "$file" ]; then + echo " skip: $file not found" >&2 + return + fi + + perl -0777 -i -pe " + s{.*?} + {${MD_TABLE}}s + " "$file" + + echo " patched: $file" +} + +# Patch HTML file between sentinels +patch_html() { + local file="$1" + if [ ! -f "$file" ]; then + echo " skip: $file not found" >&2 + return + fi + + # Build HTML rows (skip width column) + local html_rows="" + while IFS=',' read -r target width lean core full; do + local tr="" + if [ -n "$html_rows" ]; then + html_rows+=$'\n' + fi + html_rows+="${tr}" + done <<< "${SORTED}" + + local replacement + replacement=""$'\n' + replacement+="
      ${target}$(fmt_kb "${lean}")$(fmt_kb "${core}")$(fmt_kb "${full}")
      "$'\n' + replacement+=""$'\n' + replacement+=""$'\n' + replacement+="${html_rows}"$'\n' + replacement+=""$'\n' + replacement+="
      TargetLeanCoreFull
      "$'\n' + replacement+="" + + perl -0777 -i -pe " + s{.*?} + {${replacement}}s + " "$file" + + echo " patched: $file" +} + +echo "" +echo "Patching doc files..." +patch_markdown "README.md" +patch_markdown "docs/building.md" +patch_html "pages/guide/building.html" +echo "Done." diff --git a/scripts/size_report.sh b/scripts/size_report.sh deleted file mode 100755 index 69c875f..0000000 --- a/scripts/size_report.sh +++ /dev/null @@ -1,142 +0,0 @@ -#!/bin/bash -# Enhanced size report for FR_Math library -# Builds for multiple architectures and displays a formatted table - -set -e - -# Colors for output -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' # No Color - -# Source and build directories -SRC_DIR="src" -BUILD_DIR="build" -TEMP_DIR="build/size_report" - -# Create temp directory for builds -mkdir -p "$TEMP_DIR" - -# Function to build and get size for an architecture -build_and_size() { - local arch=$1 - local compiler=$2 - local flags=$3 - local output_file="$TEMP_DIR/FR_math_${arch}.o" - - if command -v $compiler >/dev/null 2>&1; then - # Try to compile - if $compiler $flags -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$output_file" 2>/dev/null; then - # Get size in bytes - local size=$(wc -c < "$output_file" 2>/dev/null || echo "0") - echo "$size" - else - echo "fail" - fi - else - echo "n/a" - fi -} - -# Function to format number with commas -format_number() { - printf "%'d" $1 2>/dev/null || echo $1 -} - -echo "" -echo "=========================================" -echo " FR_Math Multi-Architecture Size Report" -echo "=========================================" -echo "" -echo "Building for all available architectures..." -echo "" - -# Build for each architecture -x86_32_size=$(build_and_size "x86-32" "gcc" "-m32") -x86_64_size=$(build_and_size "x86-64" "gcc" "-m64") -arm32_size=$(build_and_size "arm32" "arm-linux-gnueabihf-gcc" "") -arm64_size=$(build_and_size "arm64" "aarch64-linux-gnu-gcc" "") -# Bare-metal Cortex-M (Thumb) targets — toolchain is arm-none-eabi-gcc. -# Cortex-M0 = Thumb-1 (very dense, no DSP), Cortex-M4 = Thumb-2 (DSP, MAC). -cm0_size=$(build_and_size "cortex-m0" "arm-none-eabi-gcc" "-mcpu=cortex-m0 -mthumb --specs=nosys.specs") -cm4_size=$(build_and_size "cortex-m4" "arm-none-eabi-gcc" "-mcpu=cortex-m4 -mthumb --specs=nosys.specs") -m68k_size=$(build_and_size "m68k" "m68k-elf-gcc" "") -# RISC-V: try the bare-metal newlib toolchain first, fall back to elf names. -riscv32_size=$(build_and_size "riscv32" "riscv64-unknown-elf-gcc" "-march=rv32imc -mabi=ilp32") -if [ "$riscv32_size" = "n/a" ]; then - riscv32_size=$(build_and_size "riscv32" "riscv32-unknown-elf-gcc" "") -fi -riscv64_size=$(build_and_size "riscv64" "riscv64-unknown-elf-gcc" "-march=rv64imac -mabi=lp64") - -# Native build -native_arch=$(uname -m) -native_size=$(build_and_size "native" "gcc" "") - -# Print formatted table -printf "┌──────────────┬──────────────┬──────────┐\n" -printf "│ Architecture │ Compiler │ Size │\n" -printf "├──────────────┼──────────────┼──────────┤\n" - -# Function to print a row -print_row() { - local arch=$1 - local compiler=$2 - local size=$3 - - if [ "$size" = "n/a" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "not found" " -" - elif [ "$size" = "fail" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "error" " -" - elif [ "$size" = "0" ]; then - printf "│ %-12s │ %-12s │ %8s │\n" "$arch" "$compiler" " -" - else - printf "│ %-12s │ %-12s │ %'8d │\n" "$arch" "$compiler" "$size" - fi -} - -# Print each architecture -print_row "x86-32" "gcc -m32" "$x86_32_size" -print_row "x86-64" "gcc -m64" "$x86_64_size" -print_row "ARM32" "arm-gcc" "$arm32_size" -print_row "ARM64" "aarch64-gcc" "$arm64_size" -print_row "Cortex-M0" "arm-eabi-gcc" "$cm0_size" -print_row "Cortex-M4" "arm-eabi-gcc" "$cm4_size" -print_row "68k" "m68k-gcc" "$m68k_size" -print_row "RISC-V 32" "riscv32-gcc" "$riscv32_size" -print_row "RISC-V 64" "riscv64-gcc" "$riscv64_size" -printf "├──────────────┼──────────────┼──────────┤\n" -print_row "Native($native_arch)" "gcc" "$native_size" -printf "└──────────────┴──────────────┴──────────┘\n" - -# Optimization comparison for native -if [ "$native_size" != "n/a" ] && [ "$native_size" != "fail" ]; then - echo "" - echo "Optimization Comparison (Native $native_arch):" - echo "────────────────────────────────────────" - - os_size=$(gcc -Isrc -Wall -Os -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_Os.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_Os.o" || echo "0") - o2_size=$(gcc -Isrc -Wall -O2 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O2.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O2.o" || echo "0") - o3_size=$(gcc -Isrc -Wall -O3 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O3.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O3.o" || echo "0") - o0_size=$(gcc -Isrc -Wall -O0 -c $SRC_DIR/FR_math.c -o "$TEMP_DIR/FR_math_O0.o" 2>/dev/null && wc -c < "$TEMP_DIR/FR_math_O0.o" || echo "0") - - printf " -O0 (none): %'8d bytes\n" $o0_size - printf " -Os (size): %'8d bytes\n" $os_size - printf " -O2 (speed): %'8d bytes\n" $o2_size - printf " -O3 (max): %'8d bytes\n" $o3_size -fi - -echo "" -echo "Note: Install cross-compilers for more architectures:" -echo " Ubuntu/Debian:" -echo " sudo apt-get install gcc-multilib g++-multilib" -echo " sudo apt-get install gcc-arm-linux-gnueabihf" -echo " sudo apt-get install gcc-aarch64-linux-gnu" -echo " sudo apt-get install gcc-arm-none-eabi # Cortex-M (Thumb)" -echo " sudo apt-get install gcc-riscv64-unknown-elf" -echo " sudo apt-get install gcc-m68k-linux-gnu" -echo "" -echo " macOS (via brew):" -echo " brew install --cask gcc-arm-embedded # Cortex-M (Thumb)" -echo " brew install arm-gnu-toolchain" -echo " brew install riscv-gnu-toolchain" -echo "" \ No newline at end of file diff --git a/scripts/sync_version.sh b/scripts/sync_version.sh index 4a7f763..2a33525 100755 --- a/scripts/sync_version.sh +++ b/scripts/sync_version.sh @@ -18,8 +18,7 @@ # src/FR_math.h — FR_MATH_VERSION string (derived from _HEX) # VERSION — plain-text "X.Y.Z" (derived from _HEX) # README.md — shields.io version badge -# README.md — "Current version:" line -# pages/assets/site.js — FR_VERSION constant (docs page header) +# pages/version.json — {"version":"X.Y.Z","hex":"0xMMmmpp"} for site.js # src/FR_math_2D.h — @version doxygen tag # src/FR_math_2D.cpp — @version doxygen tag # library.properties — Arduino Library Manager version @@ -196,20 +195,29 @@ update_file "README.md version badge" "${PROJECT_ROOT}/README.md" \ "s|(img\\.shields\\.io/badge/version-)[0-9]+\\.[0-9]+\\.[0-9]+(-[a-z]+\\.svg)|\${1}${VERSION}\${2}|g" # -------------------------------------------------------------------------- -# 4. README.md — "Current version: X.Y.Z" line in the Version section +# 4. pages/version.json — machine-readable version for site.js +# site.js fetches this at runtime so no hardcoded version in JS. # -------------------------------------------------------------------------- -update_file "README.md Current version: line" "${PROJECT_ROOT}/README.md" \ - "s|(Current version: )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g" - -# -------------------------------------------------------------------------- -# 5. pages/assets/site.js — FR_VERSION constant -# Pattern: var FR_VERSION = 'v2.0.0'; -# -------------------------------------------------------------------------- -update_file "pages/assets/site.js FR_VERSION" "${PROJECT_ROOT}/pages/assets/site.js" \ - "s|(var FR_VERSION = 'v)[0-9]+\\.[0-9]+\\.[0-9]+(';)|\${1}${VERSION}\${2}|g" +VER_JSON="${PROJECT_ROOT}/pages/version.json" +VER_JSON_WANT="{\"version\":\"${VERSION}\",\"hex\":\"${WANT_HEX}\"}" +VER_JSON_CUR="" +if [[ -f "${VER_JSON}" ]]; then + VER_JSON_CUR=$(cat "${VER_JSON}" | tr -d '[:space:]') +fi +VER_JSON_WANT_TRIMMED=$(echo "${VER_JSON_WANT}" | tr -d '[:space:]') +if [[ "${VER_JSON_CUR}" == "${VER_JSON_WANT_TRIMMED}" ]]; then + echo -e " ${GREEN}ok ${NC} pages/version.json" +elif [[ "${MODE}" == "check" ]]; then + echo -e " ${RED}DRIFT${NC} pages/version.json" + DRIFT=1 +else + echo "${VER_JSON_WANT}" > "${VER_JSON}" + echo -e " ${YELLOW}updated${NC} pages/version.json" + CHANGED=1 +fi # -------------------------------------------------------------------------- -# 6. src/FR_math_2D.h — @version doxygen tag +# 5. src/FR_math_2D.h — @version doxygen tag # -------------------------------------------------------------------------- update_file "src/FR_math_2D.h @version" "${PROJECT_ROOT}/src/FR_math_2D.h" \ "s|(\\@version )[0-9]+\\.[0-9]+\\.[0-9]+|\${1}${VERSION}|g" diff --git a/scripts/update_sizes.sh b/scripts/update_sizes.sh deleted file mode 100755 index b696edd..0000000 --- a/scripts/update_sizes.sh +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env bash -# -# update_sizes.sh — read build/sizes.csv and patch the size table into -# README.md, docs/building.md, and pages/guide/building.html. -# -# Usage: -# scripts/update_sizes.sh # print table to stdout -# scripts/update_sizes.sh --update # also patch the three doc files -# -# The table is delimited by sentinel comments: -# -# ... -# -# -# Exit status: 0 on success, non-zero on missing CSV or extraction failure. - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" -cd "${PROJECT_ROOT}" - -CSV="build/sizes.csv" -MODE="print" - -for arg in "$@"; do - case "$arg" in - --update) MODE="update" ;; - -h|--help) - echo "Usage: scripts/update_sizes.sh [--update]" - echo " (no args) Read build/sizes.csv, print size table" - echo " --update Also patch README.md, docs/building.md, pages/guide/building.html" - exit 0 - ;; - *) echo "Unknown option: $arg" >&2; exit 1 ;; - esac -done - -if [ ! -f "${CSV}" ]; then - echo "ERROR: ${CSV} not found. Run docker/build_sizes.sh first." >&2 - exit 1 -fi - -# ----------------------------------------------------------------------- -# 1. Read CSV and sort by width then full_bytes ascending -# ----------------------------------------------------------------------- - -# Skip header, sort numerically by field 2 (width) then field 4 (full_bytes) -SORTED=$(tail -n +2 "${CSV}" | sort -t',' -k2,2n -k4,4n) - -if [ -z "${SORTED}" ]; then - echo "ERROR: No data rows in ${CSV}" >&2 - exit 1 -fi - -# Build markdown data rows -MD_ROWS="" -while IFS=',' read -r target width core full; do - # Format bytes as X.X KB - fmt_kb() { - local val="$1" - if [[ "${val}" =~ ^[0-9]+$ ]]; then - awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }" - else - echo "${val}" - fi - } - row="| ${target} | $(fmt_kb "${core}") | $(fmt_kb "${full}") |" - if [ -n "${MD_ROWS}" ]; then - MD_ROWS+=$'\n' - fi - MD_ROWS+="${row}" -done <<< "${SORTED}" - -# Build full markdown table -MD_TABLE=""$'\n' -MD_TABLE+="| Target | Core | Full |"$'\n' -MD_TABLE+="|--------|-----:|-----:|"$'\n' -MD_TABLE+="${MD_ROWS}"$'\n' -MD_TABLE+="" - -echo "${MD_TABLE}" - -if [ "${MODE}" != "update" ]; then - exit 0 -fi - -# ----------------------------------------------------------------------- -# 2. Patch markdown files -# ----------------------------------------------------------------------- -patch_markdown() { - local file="$1" - if [ ! -f "$file" ]; then - echo " skip: $file not found" >&2 - return - fi - - perl -0777 -i -pe " - s{.*?} - {${MD_TABLE}}s - " "$file" - - echo " patched: $file" >&2 -} - -patch_markdown "README.md" -patch_markdown "docs/building.md" - -# ----------------------------------------------------------------------- -# 3. Patch HTML file (pages/guide/building.html) -# ----------------------------------------------------------------------- -patch_html() { - local file="$1" - if [ ! -f "$file" ]; then - echo " skip: $file not found" >&2 - return - fi - - # Convert sorted CSV rows to HTML rows - local html_rows="" - while IFS=',' read -r target width core full; do - fmt_kb() { - local val="$1" - if [[ "${val}" =~ ^[0-9]+$ ]]; then - awk "BEGIN { printf \"%.1f KB\", ${val}/1024.0 }" - else - echo "${val}" - fi - } - local tr="${target}$(fmt_kb "${core}")$(fmt_kb "${full}")" - if [ -n "$html_rows" ]; then - html_rows+=$'\n' - fi - html_rows+="${tr}" - done <<< "${SORTED}" - - # Build the replacement block - local replacement - replacement=""$'\n' - replacement+=""$'\n' - replacement+=""$'\n' - replacement+=""$'\n' - replacement+="${html_rows}"$'\n' - replacement+=""$'\n' - replacement+="
      TargetCoreFull
      "$'\n' - replacement+="" - - perl -0777 -i -pe " - s{.*?} - {${replacement}}s - " "$file" - - echo " patched: $file" >&2 -} - -patch_html "pages/guide/building.html" - -echo "Size table updated in all doc files." >&2 diff --git a/src/FR_math.c b/src/FR_math.c index 181972e..95809f8 100644 --- a/src/FR_math.c +++ b/src/FR_math.c @@ -30,164 +30,646 @@ */ #include "FR_math.h" -#include "FR_trig_table.h" #ifndef FR_NO_STDINT #include #endif /*======================================================= - * BAM-native trig: fr_cos_bam, fr_sin_bam, fr_cos, fr_sin, fr_tan + * Trig lookup tables (inlined — no separate header needed) * - * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits - * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd - * quadrants (1, 3) reverse the in-quadrant index so the table is always read - * in the same direction. Quadrants 1 and 2 get their sign flipped at the - * end. - * - * Within each quadrant, the upper FR_TRIG_TABLE_BITS bits of the - * in-quadrant value index the table; the lower FR_TRIG_FRAC_BITS bits drive - * round-to-nearest linear interpolation between adjacent table entries. - * - * The last entry (table[FR_TRIG_TABLE_SIZE-1] = 0) means the - * interpolation at the very edge of the quadrant never reads out of bounds. - * - * Rounding: we interpolate as - * v = lo - ((d * frac + HALF) >> FRAC_BITS) - * where d = lo - hi (which is >= 0 because cos is monotonically decreasing - * on [0, pi/2]). Using the subtract form guarantees the argument of >> is - * always non-negative, so the behavior is portable C89 (no reliance on - * implementation-defined right-shift of negative integers) and the +HALF - * gives unambiguous round-half-up. Max error vs the true cos is ~1 LSB of - * s0.15 (~3e-5 absolute); mean error ~0 (no bias). + * Sine quadrant table: 129 entries covering [0, pi/2] in u0.15 format. + * Tangent octant table: 65 entries covering [0, pi/4] in u0.15 format. + * Generated by tools/coef-gen.py — do not hand-edit. */ -s32 fr_cos_bam(u16 bam) + +#define FR_TRIG_TABLE_BITS (7) +#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) + +#define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) +#define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) +#define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) +#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) +#define FR_TRIG_QUADRANT (1 << 14) + +static const unsigned short gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE] = { + 0, 402, 804, 1206, 1608, 2009, 2411, 2811, + 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, + 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, + 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, + 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, + 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, + 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, + 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, + 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, + 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, + 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, + 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, + 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, + 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, + 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, + 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, + 32768 +}; + +#define FR_TAN_TABLE_BITS (6) +#define FR_TAN_TABLE_SIZE ((1 << FR_TAN_TABLE_BITS) + 1) +#define FR_TAN_FRAC_BITS (13 - FR_TAN_TABLE_BITS) +#define FR_TAN_FRAC_MAX (1 << FR_TAN_FRAC_BITS) +#define FR_TAN_FRAC_MASK (FR_TAN_FRAC_MAX - 1) +#define FR_TAN_FRAC_HALF (FR_TAN_FRAC_MAX >> 1) +#define FR_TAN_OCTANT (1 << 13) + +static const unsigned short gFR_TAN_TAB_O[FR_TAN_TABLE_SIZE] = { + 0, 402, 804, 1207, 1610, 2013, 2417, 2822, + 3227, 3634, 4042, 4450, 4861, 5272, 5686, 6101, + 6518, 6937, 7358, 7782, 8208, 8637, 9068, 9503, + 9940, 10381, 10825, 11273, 11725, 12180, 12640, 13104, + 13573, 14046, 14525, 15009, 15498, 15993, 16494, 17001, + 17515, 18035, 18563, 19098, 19640, 20191, 20750, 21318, + 21895, 22481, 23078, 23685, 24302, 24931, 25572, 26226, + 26892, 27572, 28266, 28975, 29699, 30440, 31198, 31973, + 32768 +}; + +/*======================================================= + * Full-precision radian/degree → BAM conversion helpers + * + * rad_to_bam_full(r) returns a full s32 BAM value where: + * upper 16 bits = integer BAM (the u16 table index) + * lower 16 bits = sub-BAM fractional part + * Input r must already be normalized to radix 16 and reduced to [-pi, pi]. + * + * The shift terms match FR_RAD2BAM (10 terms, ~21-bit accuracy) but are + * reordered so intermediate sums stay within s32 for |r| <= pi at r16. + */ +static s32 rad_to_bam_full(s32 r) { - u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ - u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ - u32 idx, frac; - s32 lo, hi, d, v; + /* 10 terms: 65536/(2*pi) ≈ 10430.37835... + * 2^13 + 2^11 + 2^7 + 2^6 - 2 + 0.5 - 0.125 + 2^-8 - 2^-11 - 2^-14 + * = 10430.378357 (~21-bit accuracy) + * Terms reordered: interleave negatives early to keep all intermediate + * sums within s32 for |r| <= pi at r16 (max result ≈ 2^31 - 4K). */ + return (r<<13)-(r<<1)+(r<<11)-(r>>3)+(r<<7)+(r<<6)+(r>>1)+(r>>8)-(r>>11)-(r>>14); +} - /* Exact cardinal angles: bam=0 → 1.0, bam=16384 → 0, etc. */ - if (inq == 0) - { - if (q == 0) return FR_TRIG_ONE; /* 0° → 1.0 */ - if (q == 2) return -FR_TRIG_ONE; /* 180° → -1.0 */ - return 0; /* 90° or 270° → 0 */ - } +#ifndef FR_LEAN +/* deg_to_bam_full(d) — same idea for degrees. + * Input d must already be normalized to radix 16 and reduced to [-90, 90). + * Returns full s32 BAM (upper 16 = integer BAM, lower 16 = sub-BAM). + * 7 terms, ~18-bit accuracy matching FR_DEG2BAM. */ +static s32 deg_to_bam_full(s32 d) +{ + return (d<<7)+(d<<6)-(d<<3)-(d<<1)+(d>>5)+(d>>6)-(d>>9); +} +#endif - if (q == 1 || q == 3) - inq = FR_TRIG_QUADRANT - inq; /* mirror across pi/2 */ +/* Normalize a fixed-radix value to radix 16. */ +static s32 normalize_to_r16(s32 val, u16 radix) +{ + return (radix > 16) ? (val >> (radix - 16)) + : (radix < 16) ? (val << (16 - radix)) + : val; +} - idx = inq >> FR_TRIG_FRAC_BITS; /* table index [0..SIZE-1] */ - frac = inq & FR_TRIG_FRAC_MASK; /* interp fraction */ - lo = gFR_COS_TAB_Q[idx]; - hi = gFR_COS_TAB_Q[idx + 1]; - d = lo - hi; /* >= 0: cos monotonic */ - v = lo - (((d * (s32)frac) + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); +/* Reduce non-negative radian (at r16) to [0, 2*pi). */ +static s32 reduce_to_2pi(s32 r) +{ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + if (r > (two_pi << 1)) + r -= (r / two_pi) * two_pi; + else if (r > two_pi) + r -= two_pi; + return r; +} - /* Shift s0.15 → s15.16 */ - v <<= 1; - return (q == 1 || q == 2) ? -v : v; +/* rad_r16_to_bam — convert radian (at r16) in [0, 2π) to u16 BAM. + * Uses quadrant decomposition to keep rad_to_bam_full in its safe + * [-π/2, π/2) range, mirroring the approach in fr_deg_to_bam. */ +static u16 rad_r16_to_bam(s32 r) +{ + const s32 half_pi = FR_HALF_PI(16); /* 102944 */ + const s32 three_half_pi = FR_THREE_HALF_PI(16); /* 308831 */ + const s32 pi = FR_PI(16); /* 205887 */ + const s32 two_pi = FR_TWO_PI(16); /* 411775 */ + + u16 offset = 0; + if (r >= half_pi && r < three_half_pi) { + r -= pi; + offset = 0x8000u; + } else if (r >= three_half_pi) { + r -= two_pi; + /* r is now in [-π/2, 0), no offset needed (u16 wraps naturally) */ + } + return (u16)(offset + (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16)); } -s32 fr_sin_bam(u16 bam) +/* (rad_r16_to_bam32 removed — sub-BAM interpolation approach abandoned) */ + +/* fr_rad_to_bam — overflow-safe radian to u16 BAM conversion. + * Normalizes to r16, reduces to [0, 2π), uses quadrant decomposition. */ +u16 fr_rad_to_bam(s32 rad, u16 radix) { - /* sin(x) = cos(x - pi/2) = cos(bam - 16384). The u16 wraparound makes - * this completely free. - */ - return fr_cos_bam((u16)(bam - FR_BAM_QUADRANT)); + s32 r = normalize_to_r16(rad, radix); + /* Normalize to [0, 2π) */ + if (r < 0) { + r += ((-r) / FR_TWO_PI(16)) * FR_TWO_PI(16); + if (r < 0) r += FR_TWO_PI(16); + } + r = reduce_to_2pi(r); + return rad_r16_to_bam(r); +} + +#ifndef FR_LEAN +/* fr_deg_to_bam — overflow-safe degree to u16 BAM conversion. + * Normalizes to r16, reduces to [-90, 90) with quadrant offset. */ +u16 fr_deg_to_bam(s32 deg, u16 radix) +{ + s32 d = normalize_to_r16(deg, radix); + + /* Reduce to [-180, 180) */ + if (d >= FR_D360_R16 || d < -FR_D360_R16) { + s32 n = d / FR_D360_R16; + d -= n * FR_D360_R16; + } + if (d >= FR_D180_R16) d -= FR_D360_R16; + if (d < -FR_D180_R16) d += FR_D360_R16; + + /* Reduce to [-90, 90) with BAM quadrant offset */ + u16 offset = 0; + if (d >= FR_D90_R16) { d -= FR_D180_R16; offset = 32768; } + else if (d < -FR_D90_R16) { d += FR_D180_R16; offset = 32768; } + + return (u16)(offset + (u16)((deg_to_bam_full(d) + (1 << 15)) >> 16)); } +#endif -/* Convert radians at given radix to BAM with rounding. - * One radian = 65536 / (2*pi) ≈ 10430.378 BAM units. - * We use the more precise scaled constant 10430378 / 1000 to keep error - * bounded across a wide range of radians. +/*======================================================= + * BAM-native trig: fr_sin_bam, fr_cos_bam, fr_cos, fr_sin, fr_tan + * + * Internal model: every angle is reduced to a u16 BAM value. The top 2 bits + * select the quadrant, the bottom 14 bits are the in-quadrant position. Odd + * quadrants (1, 3) reverse the in-quadrant index so the table is always read + * in the same direction. + * + * The table is a 129-entry SINE quadrant (ascending: 0 at index 0, 32768 at + * index 128). After mirroring, small full_pos → small output (near zero), + * which enables a cheap small-angle approximation: sin(θ) ≈ θ for angles + * below one table step (~0.7°). This eliminates table quantization error + * in the region where it matters most. + * + * Sign rule: quadrants 2 and 3 negate the result. + * Mirror rule: quadrants 1 and 3 flip the in-quadrant position. */ -static u16 fr_rad_to_bam(s32 rad, u16 radix) +s32 fr_sin_bam(u16 bam) { - int64_t scaled = ((int64_t)rad * 10430378LL) / 1000; - if (radix > 0) - scaled >>= radix; - return (u16)((u32)scaled & 0xffff); + u32 q = ((u32)bam >> 14) & 0x3; /* top 2 bits = quadrant */ + u32 inq = (u32)bam & (FR_TRIG_QUADRANT - 1); /* bottom 14 bits */ + + /* Exact cardinal angles */ + if (inq == 0) { + if (q == 0 || q == 2) return 0; /* 0° or 180° → 0 */ + if (q == 1) return FR_TRIG_ONE; /* 90° → 1.0 */ + return -FR_TRIG_ONE; /* 270° → -1.0 */ + } + + /* Odd quadrants mirror: read table from the far end */ + if (q == 1 || q == 3) + inq = FR_TRIG_QUADRANT - inq; + + s32 v; + + /* Small-angle approximation: sin(θ) ≈ θ for inq < 128 (one table step). + * θ_rad = inq * (π/2) / 16384. Output = θ * 65536 = inq * FR_kQ2RAD / 16384. + * Max inq=127: 127 * 102944 / 16384 = 798. Error: θ³/6 < 3e-7 << 1 LSB. */ + if (inq < FR_TRIG_FRAC_MAX) { + v = (s32)(((u32)inq * 102944u + 8192u) >> 14); + } else { + /* Table lookup with 7-bit interpolation fraction */ + u32 idx = inq >> FR_TRIG_FRAC_BITS; + u32 frac = inq & FR_TRIG_FRAC_MASK; + s32 lo = (s32)gFR_SIN_TAB_Q[idx]; + s32 hi = (s32)gFR_SIN_TAB_Q[idx + 1]; + v = lo + (((hi - lo) * (s32)frac + FR_TRIG_FRAC_HALF) >> FR_TRIG_FRAC_BITS); + v <<= 1; /* u0.15 → s15.16 */ + } + + return (q >= 2) ? -v : v; } -s32 fr_cos(s32 rad, u16 radix) +s32 fr_cos_bam(u16 bam) { - return fr_cos_bam(fr_rad_to_bam(rad, radix)); + /* cos(x) = sin(x + pi/2) = sin(bam + 16384). u16 wraparound is free. */ + return fr_sin_bam((u16)(bam + FR_BAM_QUADRANT)); } -s32 fr_sin(s32 rad, u16 radix) +s32 fr_cos(s32 rad, u16 radix) { - return fr_sin_bam(fr_rad_to_bam(rad, radix)); + if (rad == 0) return FR_TRIG_ONE; + s32 r = normalize_to_r16(rad, radix); + if (r < 0) r = -r; + r = reduce_to_2pi(r); + /* Near π/2 or 3π/2 (cos=0 crossings): cos(π/2+δ) = -sin(δ) ≈ -δ, + * cos(3π/2+δ) = sin(δ) ≈ δ. */ + s32 delta = r - FR_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return -delta; + delta = r - FR_THREE_HALF_PI(16); + if (delta >= -256 && delta <= 256) + return delta; + return fr_cos_bam(fr_rad_to_bam(rad, radix)); } -/* fr_tan: returns sin/cos at s15.16 (radix 16). Saturates if cos is near zero. */ -s32 fr_tan(s32 rad, u16 radix) +s32 fr_sin(s32 rad, u16 radix) { - u16 bam = fr_rad_to_bam(rad, radix); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); + /* Near 0 after reduction: sin(δ) ≈ δ */ + if (r < 256) { + s32 v = r; + return (sign < 0) ? -v : v; + } + /* Near π: sin(π + δ) = -sin(δ) ≈ -δ */ + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Near 2π: sin(2π - δ) = -sin(δ) ≈ -δ, but δ = 2π - r */ + delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) { + s32 v = -delta; + return (sign < 0) ? -v : v; + } + /* Main path: reduce to [-π, π], convert to u16 BAM, table lookup */ + if (r > FR_PI(16)) r -= FR_TWO_PI(16); + u16 bam = (u16)((rad_to_bam_full(r) + (1 << 15)) >> 16); + s32 v = fr_sin_bam(bam); + return (sign < 0) ? -v : v; } +#ifndef FR_LEAN /*======================================================= - * Integer-degree and fixed-radix-degree trig wrappers + * BAM-native tangent: fr_tan_bam * - * FR_CosI / FR_SinI are macros in the header (zero cost). The fixed-radix - * variants here convert s.r degrees to BAM in one shot using a precomputed - * reciprocal of 360 to avoid division on multiply-poor cores like 8051. + * Uses a 65-entry octant table (gFR_TAN_TAB_O) for the first octant + * [0, 45°] and the reciprocal identity tan(x) = 1/tan(90°-x) for the + * second octant (45°, 90°). Result is s15.16 with saturation at the + * poles. * - * Math: bam = deg * (65536 / 360) = deg * 182.0444... - * In s.16 fixed point: 65536 / 360 = 0xB60B (rounded). So - * bam_u16 = (deg_s.r * 0xB60B) >> r - * gives bam in u16 BAM units. The constant 0xB60B contains the divide by - * 360 baked in; the shift `>> r` strips the input radix. + * No 64-bit intermediates. One 32-bit division only in the >45° path. */ -static u16 fr_deg_radix_to_bam(s16 deg, u16 radix) +s32 fr_tan_bam(u16 bam) { - /* 0xB60B ≈ (65536/360) * 256 — the ×256 prescale keeps 32-bit math - * friendly to 8051-class MCUs. We must shift out both the input - * fraction bits (radix) AND the 8-bit prescale, hence radix + 8. - * The +half term rounds to nearest, matching FR_DEG2BAM behaviour. - */ - s32 v = (s32)deg * 0xB60BL; - u16 shift = radix + 8; - return (u16)((u32)((v + (1L << (shift - 1))) >> shift) & 0xffff); + u32 q = ((u32)bam >> 14) & 0x3; /* quadrant (top 2 bits) */ + u32 inq = (u32)bam & 0x3FFFu; /* in-quadrant (14 bits) */ + s32 sign = 1; + u32 idx, frac; + s32 lo, hi, raw; + + /* Exact zeros: bam lands exactly on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Poles: bam lands exactly on 90° or 270° */ + if (inq == 0 && (q == 1 || q == 3)) + return (q == 1) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + + /* Q1 (90°..180°) and Q3 (270°..360°): reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + sign = -1; + } + + /* Now inq is in (0, 0x4000) = (0°, 90°) exclusive. + * Split into first octant [0, 45°) and second octant [45°, 90°). */ + if (inq < FR_TAN_OCTANT) { + /* First octant: direct table lookup + lerp. + * inq is 13 bits; top FR_TAN_TABLE_BITS index the table, + * bottom FR_TAN_FRAC_BITS drive interpolation. */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near zero: redo interpolation with 4 extra bits of + * precision to reduce rounding error when result is small. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; /* u0.19 → s15.16 with rounding */ + } else { + raw <<= 1; /* u0.15 → s15.16 */ + } + } else { + /* Second octant: tan(x) = 1 / tan(90° - x). + * complement is in (0, 0x2000] = (0°, 45°]. */ + u32 comp = 0x4000u - inq; + + /* Look up tan(complement) from the table */ + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; + lo = (s32)gFR_TAN_TAB_O[idx]; + hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + /* Near pole: redo interpolation with 4 extra bits of + * precision. The reciprocal amplifies small interpolation + * errors, so extra precision significantly helps here. + * Result: (2^31 / raw_hp) << 4 = 2^35 / raw_hp. */ + s32 lo4 = (s32)gFR_TAN_TAB_O[idx] << 4; + s32 hi4 = (s32)gFR_TAN_TAB_O[idx + 1] << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + if (raw_hp < 32) { + raw = FR_TRIG_MAXVAL; + } else { + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + + return (sign < 0) ? -raw : raw; } +#endif /* FR_LEAN */ -s32 FR_Cos(s16 deg, u16 radix) +/* fr_tan — radian-input tangent. + * + * Normalize to [0, 2π], extract quadrant sign, convert rad→u16 BAM, + * then do direct octant table lookup + interpolation inline. + * Small-angle bypass at zero crossings: tan(x) ≈ x. + * Near poles: use radian distance directly (cot(δ) ≈ 1/δ) to avoid + * BAM quantization error amplified by the reciprocal. */ +s32 fr_tan(s32 rad, u16 radix) { - return fr_cos_bam(fr_deg_radix_to_bam(deg, radix)); + if (rad == 0) return 0; + s32 r = normalize_to_r16(rad, radix); + + /* tan(-x) = -tan(x): extract sign, work with |r| */ + s32 sign = 1; + if (r < 0) { r = -r; sign = -1; } + r = reduce_to_2pi(r); + + /* Small-angle bypass at zero crossings: tan(δ) ≈ δ */ + if (r < 256) + return (sign < 0) ? -r : r; + { + s32 delta = r - FR_PI(16); + if (delta >= -256 && delta <= 256) + return (sign < 0) ? -delta : delta; + } + { + s32 delta = FR_TWO_PI(16) - r; + if (delta >= 0 && delta < 256) + return (sign < 0) ? delta : -delta; + } + + /* Near-pole bypass: within POLE_THRESH r16 of π/2 or 3π/2, + * use cot(δ) ≈ 1/δ from the radian distance directly. + * Compute δ at r24 using precise pole constants (8× less rounding + * error than the r16 FR_HALF_PI/FR_THREE_HALF_PI constants). + * At δ=2048 r16 (1.79°), 1/δ error is ~0.03%. */ + { + const s32 pole_thresh = 2048; /* r16 units (~1.79°) */ + /* Precise pole positions at r24: + * π/2 × 2^24 = 26353589.76 → 26353590 + * 3π/2 × 2^24 = 79060769.28 → 79060769 */ + const s32 half_pi_r24 = 26353590; + const s32 three_half_pi_r24 = 79060769; + + s32 d1 = r - FR_HALF_PI(16); /* coarse check at r16 */ + s32 d2 = r - FR_THREE_HALF_PI(16); + s32 pole_delta_r24 = 0; + + if (d1 >= -pole_thresh && d1 <= pole_thresh) { + s32 r24 = r << 8; + s32 dd = r24 - half_pi_r24; + pole_delta_r24 = (dd < 0) ? -dd : dd; + } else if (d2 >= -pole_thresh && d2 <= pole_thresh) { + s32 r24 = r << 8; + s32 dd = r24 - three_half_pi_r24; + pole_delta_r24 = (dd < 0) ? -dd : dd; + } + + if (pole_delta_r24 > 0) { + /* Determine sign from radian quadrant */ + s32 pole_sign; + if (r < FR_HALF_PI(16)) + pole_sign = 1; /* before π/2: → +∞ */ + else if (r < FR_PI(16)) + pole_sign = -1; /* past π/2: → -∞ */ + else if (r <= FR_THREE_HALF_PI(16)) + pole_sign = 1; /* before 3π/2: → +∞ */ + else + pole_sign = -1; /* past 3π/2: → -∞ */ + + s32 raw; + if (pole_delta_r24 < 512) { + raw = FR_TRIG_MAXVAL; /* δ < 2 at r16 → saturate */ + } else { + /* cot(δ) ≈ 1/δ. In s15.16: (2^40) / δ_r24 */ + raw = (s32)((1ULL << 40) / (u32)pole_delta_r24); + if (raw > FR_TRIG_MAXVAL) raw = FR_TRIG_MAXVAL; + } + s32 v = (pole_sign < 0) ? -raw : raw; + return (sign < 0) ? -v : v; + } + } + + /* Convert radian to u16 BAM */ + u16 bam = rad_r16_to_bam(r); + + /* Decompose BAM into quadrant + in-quadrant */ + u32 q = ((u32)bam >> 14) & 0x3; + u32 inq = (u32)bam & 0x3FFFu; + s32 tsign = 1; /* tan sign from quadrant */ + + /* Exact zeros: bam lands on 0° or 180° */ + if (inq == 0 && (q == 0 || q == 2)) + return 0; + + /* Q1/Q3: reflect and negate */ + if (q == 1 || q == 3) { + inq = 0x4000u - inq; + tsign = -1; + } + + /* Octant table lookup + interpolation (same logic as fr_tan_bam) */ + u32 idx, frac; + s32 raw; + + if (inq < FR_TAN_OCTANT) { + /* First octant [0°, 45°): direct lookup */ + idx = inq >> FR_TAN_FRAC_BITS; + frac = inq & FR_TAN_FRAC_MASK; + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + s32 lo4 = lo << 4; + s32 hi4 = hi << 4; + raw = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + raw = (raw + 4) >> 3; + } else { + raw <<= 1; + } + } else { + /* Second octant [45°, 90°): reciprocal identity */ + u32 comp = 0x4000u - inq; + idx = comp >> FR_TAN_FRAC_BITS; + frac = comp & FR_TAN_FRAC_MASK; + s32 lo = (s32)gFR_TAN_TAB_O[idx]; + s32 hi = (s32)gFR_TAN_TAB_O[idx + 1]; + raw = lo + (((hi - lo) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + + if (raw < 0x40) { + s32 lo4 = lo << 4; + s32 hi4 = hi << 4; + s32 raw_hp = lo4 + (((hi4 - lo4) * (s32)frac + FR_TAN_FRAC_HALF) >> FR_TAN_FRAC_BITS); + if (raw_hp < 32) + raw = FR_TRIG_MAXVAL; + else + raw = (s32)((0x80000000u / (u32)raw_hp) << 4); + } else { + raw = (s32)(0x80000000u / (u32)raw); + } + } + + /* Combine quadrant sign and input sign */ + s32 v = (tsign < 0) ? -raw : raw; + return (sign < 0) ? -v : v; +} + +#ifndef FR_LEAN +/*======================================================= + * Degree-input trig: convert to u16 BAM via fr_deg_to_bam, then + * call the BAM-native functions. Cardinal angles are exact. + */ + +s32 fr_cos_deg(s32 deg, u16 radix) +{ + if (radix == 0) return fr_cos_bam(FR_DEG2BAM_I(deg)); + if (deg < 0) deg = -deg; + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return FR_TRIG_ONE; + if (rem == 90) return 0; + if (rem == 180) return -FR_TRIG_ONE; + if (rem == 270) return 0; + } + /* Near 90° or 270° (cos=0 crossings): cos(90+δ) = -sin(δ) ≈ -δ·π/180, + * cos(270+δ) = sin(δ) ≈ δ·π/180. Avoids BAM rounding error at zero. */ + s32 d = normalize_to_r16(deg, radix); + if (d >= FR_D360_R16) { s32 n = d / FR_D360_R16; d -= n * FR_D360_R16; } + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta = d - FR_D90_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return -dr; + } + delta = d - (FR_D90_R16 + FR_D180_R16); + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 dr = (s32)(((s64)delta * FR_kDEG2RAD + (1 << 15)) >> 16); + return dr; + } + } + return fr_cos_bam(fr_deg_to_bam(deg, radix)); } -s32 FR_Sin(s16 deg, u16 radix) +s32 fr_sin_deg(s32 deg, u16 radix) { - return fr_sin_bam(fr_deg_radix_to_bam(deg, radix)); + if (radix == 0) return fr_sin_bam(FR_DEG2BAM_I(deg)); + s32 sign = 1; + if (deg < 0) { deg = -deg; sign = -1; } + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 rem = (deg >> radix) % 360; + if (rem == 0) return 0; + if (rem == 90) return (sign < 0) ? -FR_TRIG_ONE : FR_TRIG_ONE; + if (rem == 180) return 0; + if (rem == 270) return (sign < 0) ? FR_TRIG_ONE : -FR_TRIG_ONE; + } + s32 v = fr_sin_bam(fr_deg_to_bam(deg, radix)); + return (sign < 0) ? -v : v; } -s32 FR_TanI(s16 deg) +s32 FR_TanI(s32 deg) { - u16 bam = FR_DEG2BAM(deg); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); + /* Exact pole: deg mod 180 == ±90. Sign matches input sign. */ + s32 rem = deg % 180; + if (rem == 90 || rem == -90) + return (deg > 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + return fr_tan_bam(FR_DEG2BAM_I(deg)); } -s32 FR_Tan(s16 deg, u16 radix) +s32 fr_tan_deg(s32 deg, u16 radix) { - u16 bam = fr_deg_radix_to_bam(deg, radix); - s32 s = fr_sin_bam(bam); - s32 c = fr_cos_bam(bam); - if (c == 0) - return (s >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; - return (s32)(((int64_t)s << FR_TRIG_OUT_PREC) / c); + if (radix == 0) return FR_TanI(deg); + s32 deg_orig = deg; + /* Normalize to [0, 360°) at caller radix */ + s32 d360 = 360 << radix; + if (deg < 0) { + deg += ((-deg) / d360) * d360; + if (deg < 0) deg += d360; + } + if (deg >= d360) { + deg -= (deg / d360) * d360; + } + /* Exact cardinal angles */ + s32 frac_mask = (1 << radix) - 1; + if ((deg & frac_mask) == 0) { + s32 ideg = deg >> radix; + if (ideg == 0 || ideg == 180) return 0; + if (ideg == 90 || ideg == 270) + return (deg_orig >= 0) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + /* Near 0° or 180° (tan=0 crossings): tan(δ) ≈ δ in radians */ + s32 d = normalize_to_r16(deg, radix); + { + const s32 DEG_THRESH = 14000; /* ~0.21° at r16 */ + s32 delta; + /* Near 0° */ + if (d < DEG_THRESH) { + s32 up = d << 8; + return (FR_DEG2RAD(up) + (1 << 7)) >> 8; + } + /* Near 180° */ + delta = d - FR_D180_R16; + if (delta >= -DEG_THRESH && delta <= DEG_THRESH) { + s32 up = delta << 8; + return (FR_DEG2RAD(up) + (1 << 7)) >> 8; + } + /* Near 360° */ + delta = FR_D360_R16 - d; + if (delta >= 0 && delta < DEG_THRESH) { + s32 up = delta << 8; + return -((FR_DEG2RAD(up) + (1 << 7)) >> 8); + } + } + /* Main path: convert to u16 BAM, table lookup */ + u16 bam = fr_deg_to_bam(deg, radix); + s32 v = fr_tan_bam(bam); + /* Near-pole BAM alias: determine sign from normalized angle position */ + if (bam == 0x4000u || bam == 0xC000u) { + s32 pole_d = (bam == 0x4000u) ? FR_D90_R16 : (FR_D90_R16 + FR_D180_R16); + v = (d < pole_d) ? FR_TRIG_MAXVAL : -FR_TRIG_MAXVAL; + } + return v; } +#endif /* FR_LEAN */ /*======================================================= * FR_FixMuls (x*y signed, NOT saturated, round-to-nearest) @@ -250,11 +732,9 @@ s32 FR_FixAddSat(s32 x, s32 y) /* FR_acos — returns radians at out_radix. * Range: [0, pi]. Input is a cosine value at the given radix. * - * Uses the same 129-entry cosine table as fr_cos_bam, but in reverse: - * binary-search to find the bracketing pair, then linear-interpolate - * the fractional position between them to recover the full 14-bit - * in-quadrant BAM. This mirrors the forward path and gives matching - * precision (~1 LSB of s15.16 output). + * Uses the 129-entry sine table in reverse: binary-search the ascending + * table to find asin(|input|), then acos = pi/2 - asin (with sign handling + * for the second quadrant). */ s32 FR_acos(s32 input, u16 radix, u16 out_radix) { @@ -264,32 +744,24 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 idx, d, num, frac; s32 input_abs; - /* Work with absolute value at the caller's radix — we'll need it for - * the sqrt fast path before quantising to r15. */ + /* Work with absolute value at the caller's radix */ sign = (s16)((input < 0) ? 1 : 0); input_abs = sign ? -input : input; - /* Clamp at the caller's radix — not at r15. Near ±1.0 the r15 - * quantisation can round to 32767 even when the caller has sub-LSB - * precision that the sqrt fast path can use. */ + /* Clamp at the caller's radix */ { s32 one = (s32)1 << radix; if (input_abs >= one) - return sign ? FR_BAM2RAD(FR_BAM_HALF, out_radix) : 0; + return sign ? FR_CHRDX(FR_kPI, FR_kPREC, out_radix) : 0; } v = FR_CHRDX(input_abs, radix, FR_TRIG_PREC); /* |input| at s0.15 */ - /* Small-angle fast path: when cos(θ) is close to 1.0, the table - * has only 2-8 LSBs of gap per entry, so linear interpolation is - * very coarse. Use the identity acos(x) ≈ sqrt(2*(1-x)). - * - * Key: compute 1-x at the CALLER's radix, not r15. Near ±1.0 the - * r15 quantisation crushes many distinct inputs to the same value - * (cos(179.5°)..cos(179.9°) all round to 32767 at r15). The - * caller's higher-radix bits carry the angular information via the - * identity sin(θ) = sqrt(2(1-cos θ)) — effectively the sin trick. */ - if (v > gFR_COS_TAB_Q[7]) + /* Small-angle fast path: when cos(θ) is close to 1.0, the sine table + * has poor resolution near the top (entries close together). + * Use acos(x) ≈ sqrt(2*(1-x)) instead. Threshold: v > sin_tab[121] + * means the input is > cos(7*π/256) ≈ 0.9975. */ + if (v > gFR_SIN_TAB_Q[FR_TRIG_TABLE_SIZE - 8]) { s32 one = (s32)1 << radix; s32 one_minus_x = one - input_abs; /* 1-|x| at caller radix */ @@ -297,39 +769,31 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 rad_native = FR_sqrt(two_omx, radix); /* radians at caller radix */ s32 rad_out = FR_CHRDX(rad_native, radix, out_radix); if (sign) - rad_out = FR_BAM2RAD(FR_BAM_HALF, out_radix) - rad_out; + rad_out = FR_CHRDX(FR_kPI, FR_kPREC, out_radix) - rad_out; return rad_out; } - /* Below this point we need the sign-stripped r15 value for the - * binary search. (v was already computed from input_abs above.) */ - - /* Binary search on the cosine quadrant table. The table is - * monotonically decreasing: gFR_COS_TAB_Q[0] = 32767 (cos 0°), - * gFR_COS_TAB_Q[128] = 0 (cos 90°). + /* Binary search on the ascending sine table. + * gFR_SIN_TAB_Q[0] = 0 (sin 0°), gFR_SIN_TAB_Q[128] = 32768 (sin 90°). * - * After the search, lo is the first index where table[lo] <= v, - * so the bracketing pair is (lo-1, lo) with table[lo-1] >= v >= table[lo]. - */ + * Find the first index where table[idx] >= v. */ lo = 0; hi = FR_TRIG_TABLE_SIZE; while (lo < hi) { mid = (lo + hi) >> 1; - if (gFR_COS_TAB_Q[mid] > v) + if ((s32)gFR_SIN_TAB_Q[mid] < v) lo = mid + 1; else hi = mid; } - /* lo is now the index where table[lo] <= v. The bracketing interval - * is [lo-1, lo] (table decreasing). Clamp idx to valid range. - */ + /* lo is now the first index where table[lo] >= v. + * The bracketing interval is [lo-1, lo] with table[lo-1] < v <= table[lo]. + * This gives us the asin angle; acos = pi/2 - asin. */ idx = lo; if (idx <= 0) { - /* v >= table[0] = 32767 — essentially cos(0), already clamped above - * but guard anyway. */ idx = 0; frac = 0; } @@ -340,29 +804,27 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) } else { - /* Linear interpolate between table[idx-1] and table[idx]. - * d = table[idx-1] - table[idx] (>= 0, cos decreasing) - * num = table[idx-1] - v (how far past table[idx-1]) - * frac = (num << FR_TRIG_FRAC_BITS) / d, in [0, FR_TRIG_FRAC_MAX) - * - * num and d are both in [0, 32767], so num << 7 fits in 22 bits. + /* Interpolate between table[idx-1] and table[idx]. + * d = table[idx] - table[idx-1] (>= 0, sin increasing) + * num = v - table[idx-1] (how far past table[idx-1]) */ - d = gFR_COS_TAB_Q[idx - 1] - gFR_COS_TAB_Q[idx]; - num = gFR_COS_TAB_Q[idx - 1] - v; + d = (s32)gFR_SIN_TAB_Q[idx] - (s32)gFR_SIN_TAB_Q[idx - 1]; + num = v - (s32)gFR_SIN_TAB_Q[idx - 1]; if (d > 0) frac = ((num << FR_TRIG_FRAC_BITS) + (d >> 1)) / d; else frac = 0; - /* Reconstruct: the angle is at index (idx-1) + frac/FRAC_MAX, - * so shift idx back by 1 for the BAM calculation below. */ idx = idx - 1; } { - u16 bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* asin_bam is the angle in first-quadrant BAM whose sin = v */ + u16 asin_bam = (u16)(((u32)idx << FR_TRIG_FRAC_BITS) + (u32)frac); + /* acos = pi/2 - asin (in BAM: quadrant - asin_bam) */ + u16 bam = (u16)(FR_TRIG_QUADRANT - asin_bam); if (sign) bam = (u16)(FR_BAM_HALF - bam); /* mirror: pi - angle */ - return FR_BAM2RAD(bam, out_radix); + return FR_CHRDX(FR_Q2RAD(bam), 14, out_radix); } } @@ -370,7 +832,7 @@ s32 FR_acos(s32 input, u16 radix, u16 out_radix) s32 FR_asin(s32 input, u16 radix, u16 out_radix) { /* asin(x) = pi/2 - acos(x) */ - s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); return half_pi - FR_acos(input, radix, out_radix); } @@ -394,12 +856,12 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) /* Axis cases — exact angles, no divide. */ if (x == 0) { - if (y > 0) return FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* pi/2 */ - if (y < 0) return -FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); /* -pi/2 */ + if (y > 0) return FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* pi/2 */ + if (y < 0) return -FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); /* -pi/2 */ return 0; } if (y == 0) - return (x > 0) ? 0 : FR_BAM2RAD(FR_BAM_HALF, out_radix); /* 0 or pi */ + return (x > 0) ? 0 : FR_CHRDX(FR_kPI, FR_kPREC, out_radix); /* 0 or pi */ ax = (x < 0) ? -x : x; ay = (y < 0) ? -y : y; @@ -443,7 +905,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) if (cos_val < FR_ATAN2_SMALL) { /* angle ≈ pi/2 - cos_val (symmetric small-angle identity) */ - s32 half_pi = FR_BAM2RAD(FR_BAM_QUADRANT, out_radix); + s32 half_pi = FR_CHRDX(FR_kQ2RAD, FR_kPREC, out_radix); q1_angle = half_pi - FR_CHRDX(cos_val, FR_TRIG_PREC, out_radix); } else @@ -453,7 +915,7 @@ s32 FR_atan2(s32 y, s32 x, u16 out_radix) /* Apply quadrant from signs of x and y. * q1_angle is always positive [0..pi/2]. */ { - s32 pi = FR_BAM2RAD(FR_BAM_HALF, out_radix); + s32 pi = FR_CHRDX(FR_kPI, FR_kPREC, out_radix); if (x > 0) return (y > 0) ? q1_angle : -q1_angle; /* x < 0: mirror across y-axis */ @@ -658,11 +1120,13 @@ s32 FR_ln(s32 input, u16 radix, u16 output_radix) return FR_MULK28(r, FR_krLOG2E_28); } +#ifndef FR_LEAN s32 FR_log10(s32 input, u16 radix, u16 output_radix) { s32 r = FR_log2(input, radix, output_radix); return FR_MULK28(r, FR_krLOG2_10_28); } +#endif #ifndef FR_NO_PRINT /*************************************** @@ -1016,6 +1480,7 @@ s32 FR_sqrt(s32 input, u16 radix) * * Side effects: none. Pure function. */ +#ifndef FR_LEAN s32 FR_hypot(s32 x, s32 y, u16 radix) { uint64_t xx = (uint64_t)((int64_t)x * (int64_t)x); @@ -1023,6 +1488,7 @@ s32 FR_hypot(s32 x, s32 y, u16 radix) (void)radix; /* the 2*radix in xx+yy cancels with isqrt's halving */ return (s32)fr_isqrt64(xx + yy); } +#endif /*======================================================= * FR_hypot_fast8 — 8-segment piecewise-linear magnitude approximation. diff --git a/src/FR_math.h b/src/FR_math.h index 6eff284..a2db262 100644 --- a/src/FR_math.h +++ b/src/FR_math.h @@ -32,14 +32,18 @@ #ifndef __FR_Math_h__ #define __FR_Math_h__ -#define FR_MATH_VERSION "2.0.7" -#define FR_MATH_VERSION_HEX 0x020007 /* major << 16 | minor << 8 | patch */ +#define FR_MATH_VERSION "2.0.8" +#define FR_MATH_VERSION_HEX 0x020008 /* major << 16 | minor << 8 | patch */ #ifdef FR_CORE_ONLY #define FR_NO_PRINT #define FR_NO_WAVES #endif +#ifdef FR_LEAN +#define FR_NO_WAVES +#endif + #ifdef __cplusplus extern "C" { @@ -49,21 +53,14 @@ extern "C" #include "FR_defs.h" #endif -/* Quick Note on MACRO param wrapping: - * All macro inputs are wrapped in paranthesis in this code. - * eg: #define MACRO_X_SQUARED(x) ((x)*(x)) //<<-- note internal paranthesis - * this is done because macros use true source substitution in C/C++ so a if - * a macro internally uses many operators of mixed precedence e.g. >> and * together - * undesired behavior can result if the parameter "passed" in the the macro is a - * a complex contruct e.g. instead of being a value or single variable is a - * something like 3+4*5 --> all of this would gets substituted in to the MACRO - * expression and parans eliminate chances for odd behavior. - * For example: - * MACRO_X_SQUARED_BAD(x) (x*x) - * will expand this way: - * 3+4*5*3+4*5 ==> 3+60+20 == 83 // due to precedence operations whereas - * MACRO_X_SQUARED(x) ((x)*(x)) - * (3+4*5)*(3+4*5) ==> (3+20)*(3+20) == (23)*(23) == 529 +/* Quick note on macro parameter wrapping: + * Arguments are parenthesized in expansions, e.g. + * #define MACRO_X_SQUARED(x) ((x)*(x)) // inner parens around each x + * Macros substitute text as-is. If a parameter is an expression like 3+4*5 + * and the body mixes operators without extra parentheses, precedence errors + * follow. Parenthesize parameters (and fragile subexpressions) in the macro body. + * Example: MACRO_X_SQUARED_BAD(x) (x*x) -> 3+4*5*3+4*5 == 83 (wrong). + * MACRO_X_SQUARED(x) ((x)*(x)) -> (3+4*5)*(3+4*5) == 529 (right). */ /*absolute value for integer and fixed radix types*/ @@ -258,7 +255,7 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /*================================================ * Constants used in Trig tables, definitions * - * FR_TRIG_PREC — internal table precision (s0.15, kept for table indexing) + * FR_TRIG_PREC — internal table precision (u0.15, sine table) * FR_TRIG_OUT_PREC — output precision of sin/cos/tan (s15.16 since v2.0.1) * FR_TRIG_ONE — exact 1.0 in output format (1 << 16 = 65536) * @@ -270,8 +267,8 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { #define FR_TRIG_OUT_PREC (16) #define FR_TRIG_MASK ((1 << (FR_TRIG_PREC)) - 1) #define FR_TRIG_ONE (1L << FR_TRIG_OUT_PREC) /* 65536 = 1.0 */ -#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation */ -#define FR_TRIG_MINVAL (-FR_TRIG_MASK) +#define FR_TRIG_MAXVAL ((s32)0x7fffffff) /* tan saturation max */ +#define FR_TRIG_MINVAL (-FR_TRIG_MAXVAL) /* tan saturation min */ /* Bit Shift Scaling macros. Useful on some platforms with poor MUL performance. * Also can be useful if you need to scale numbers with @@ -304,32 +301,85 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { /* scale by log2(10) 3.32192809489 used for converting pow2() to pow10 */ #define FR_SLOG2_10(x) (((x) << 1) + (x) + ((x) >> 2) + ((x) >> 4) + ((x) >> 7) + ((x) >> 10) + ((x) >> 11) + ((x) >> 13)) -/* TRIG Conversion macros - * Convert degrees <--> radians <--> quadrants <--> degrees - * no multiply (may reduce chances of overflow in certain circumstances) - * works on all int types and radixes (pure ints will have trunc err) - * radians = 2*pi per revolution - * degrees = 360 per revolution - * quadrants = 4 per revolution - * freq = 1 per revolution - */ -/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 using shifts only. - * Worst-case relative error: ~1.6e-4 (acceptable for embedded use; if you - * need better precision, multiply by FR_kDEG2RAD and shift down by FR_kPREC). - * Side-effect note: x is referenced 3 times, so do not pass an expression - * with side effects. +/* Shift-only angular conversion macros + * + * All are pure constant multipliers expressed as shifts — no multiply, no + * divide, no 64-bit intermediates, no accumulators. Work at any radix: if + * your input is degrees at radix 8, the output is the target unit at radix 8. + * The caller shifts as needed. + * + * Angular units: + * degrees = 360 per revolution + * radians = 2*pi per revolution + * BAM = 65536 per revolution (Binary Angular Measure, u16) + * quadrants = 4 per revolution (= BAM >> 14) + * + * Side-effect note: x is referenced multiple times in each macro — do not + * pass expressions with side effects. */ -#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13)) -/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.295780 using shifts only. - * Worst-case relative error: ~2.1e-6. - * Side-effect note: x is referenced 7 times. - */ +/* FR_DEG2RAD(x): multiply by pi/180 ≈ 0.017453 (5 terms, ~17 bits) */ +#define FR_DEG2RAD(x) (((x) >> 6) + ((x) >> 9) - ((x) >> 13) - ((x) >> 19) - ((x) >> 20)) + +/* FR_RAD2DEG(x): multiply by 180/pi ≈ 57.29578 (7 terms, ~19 bits) */ #define FR_RAD2DEG(x) (((x) << 6) - ((x) << 3) + (x) + ((x) >> 2) + (((x) >> 4) - ((x) >> 6)) - ((x) >> 10)) +/* FR_DEG2BAM(x): multiply by 65536/360 ≈ 182.0449 (7 terms, ~18 bits). + * Intermediate terms overflow s32 when |x| > ~256 deg at s15.16 (x<<7 term), + * but the overflow is harmless when the result is truncated to u16 BAM + * (two's complement wrapping preserves modular correctness). + * For full-precision s32 BAM (sub-BAM interpolation), use fr_deg_to_bam(). */ +#define FR_DEG2BAM(x) (((x)<<7)+((x)<<6)-((x)<<3)-((x)<<1)+((x)>>5)+((x)>>6)-((x)>>9)) + +/* FR_BAM2DEG(x): multiply by 360/65536 = 0.00549316 (4 terms, exact) */ +#define FR_BAM2DEG(x) (((x)>>8)+((x)>>9)-((x)>>12)-((x)>>13)) + +/* FR_RAD2BAM(x): multiply by 65536/(2*pi) ≈ 10430.378 (7 terms, ~21 bits). + * CAUTION: overflows s32 when |x| > ~4 rad at s15.16 (x<<13 term). + * For safe conversion at any radix, use fr_rad_to_bam() instead. + * #define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)) */ +#define FR_RAD2BAM(x) (((x)<<13)+((x)<<11)+((x)<<7)+((x)<<6)-((x)<<1)+((x)>>1)-((x)>>3)+((x)>>8)-((x)>>11)-((x)>>14)) +/* ── Overflow-safe rad/deg to BAM conversion functions ───────────── + * + * These replace the FR_RAD2BAM / FR_DEG2BAM macros for callers that + * need the full ±2*pi or ±360° range at any radix. + * + * Strategy: normalize input to radix 16, conditionally reduce into + * a safe zone, apply the full-precision shift-only multiply, then + * extract the u16 BAM. No precision loss from halving/quartering. + * + * fr_rad_to_bam: reduce to [-pi, pi], reordered terms. ±2*pi safe. + * fr_deg_to_bam: reduce to [-90, 90) + quadrant offset. ±360° safe. + */ + +/* Pi constants at any radix: FR_PI(r) = round(pi * 2^r), etc. + * Compiler evaluates at compile time when r is a constant. + * Max safe radix: FR_PI r<=29, FR_TWO_PI r<=28, FR_HALF_PI r<=30. */ +#define FR_PI(r) ((s32)(3.14159265358979323846 * (1LL << (r)) + 0.5)) +#define FR_TWO_PI(r) ((s32)(6.28318530717958647692 * (1LL << (r)) + 0.5)) +#define FR_HALF_PI(r) ((s32)(1.57079632679489661923 * (1LL << (r)) + 0.5)) +#define FR_THREE_HALF_PI(r) ((s32)(4.71238898038468985769 * (1LL << (r)) + 0.5)) + +/* Convenience aliases at radix 16 */ +#define FR_PI_R16 FR_PI(16) +#define FR_TWO_PI_R16 FR_TWO_PI(16) + +/* Degree constants at radix 16 (exact — no truncation) */ +#define FR_D90_R16 ((s32)90 << 16) +#define FR_D180_R16 ((s32)180 << 16) +#define FR_D360_R16 ((s32)360 << 16) + + u16 fr_rad_to_bam(s32 rad, u16 radix); +#ifndef FR_LEAN + u16 fr_deg_to_bam(s32 deg, u16 radix); +#endif + +/* FR_BAM2RAD(x): multiply by 2*pi/65536 ≈ 0.0000959 (5 terms, ~18 bits) */ +#define FR_BAM2RAD(x) (((x)>>13)-((x)>>15)+((x)>>18)+((x)>>21)+((x)>>25)) + +/* Legacy quadrant macros (quadrants = BAM >> 14) */ #define FR_RAD2Q(x) (((x) >> 1) + ((x) >> 3) + ((x) >> 7) + ((x) >> 8) - ((x) >> 14)) #define FR_Q2RAD(x) ((x) + ((x) >> 1) + ((x) >> 4) + ((x) >> 7) + ((x) >> 11)) - #define FR_DEG2Q(x) (((x) >> 6) - ((x) >> 8) - ((x) >> 11) - ((x) >> 13)) #define FR_Q2DEG(x) (((x) << 6) + ((x) << 4) + ((x) << 3) + ((x) << 1)) @@ -347,44 +397,12 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * - The top 2 bits select the quadrant (no `% 360` modulo needed). * - The next 7 bits index the 128-entry quadrant table directly. * - The bottom 7 bits give linear-interpolation precision. - * - * All BAM macros are *macros* (not functions) so they evaluate inline and - * cost nothing if you don't call them. Side-effect note: each macro - * references its argument multiple times — do not pass an expression with - * side effects. */ #define FR_BAM_BITS (16) #define FR_BAM_FULL (1L << FR_BAM_BITS) /* 65536 */ #define FR_BAM_QUADRANT (FR_BAM_FULL >> 2) /* 16384 */ #define FR_BAM_HALF (FR_BAM_FULL >> 1) /* 32768 */ -/* Convert degrees -> BAM. Exact formula: deg * 65536 / 360. - * Computed in s32; for s16-range deg the intermediate (deg << 16) fits. - * The cast to u16 wraps modulo full circle, which is mathematically correct. - * Side-effect note: deg is referenced twice for sign-aware rounding. - * - * Worst-case error: <= 0.5 LSB BAM (~0.0028 deg) per degree. No accumulation - * across full circles. - */ -#define FR_DEG2BAM(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) - -/* Convert BAM -> degrees. bam * (360 / 65536) ≈ bam * (45/8192). - * Truncated; result is integer degrees. - */ -#define FR_BAM2DEG(bam) ((s16)(((s32)(u16)(bam) * 45) >> 13)) - -/* Convert radians (at given radix) -> BAM. rad * (65536 / (2*pi)) ≈ rad * 10430.378 - * For radix-16 input: ((rad * 10430) >> 16). Approximated; for high accuracy - * combine with FR_kRAD2Q multiplier. - */ -#define FR_RAD2BAM(rad, radix) ((u16)(((s32)(rad) * 10430L) >> (radix))) - -/* Convert BAM -> radians at the requested output radix. - * Derivation: rad = bam * 2π / 65536. At output radix r: bam * 2π * 2^r / 2^16 - * = bam * (2π * 2^10) / 2^(26 - r) = bam * 6434 >> (26 - r). - */ -#define FR_BAM2RAD(bam, radix) ((s32)(((s32)(u16)(bam) * 6434L) >> (26 - (radix)))) - /*=============================================== * Radian-native and BAM-native trig (recommended) * @@ -397,39 +415,59 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * fr_cos(rad, radix) — cos of radians at radix, s15.16 result * fr_sin(rad, radix) — sin of radians at radix, s15.16 result * fr_tan(rad, radix) — tan of radians at radix, s15.16 result - * fr_cos_deg(deg) — cos of integer degrees, s15.16 result - * fr_sin_deg(deg) — sin of integer degrees, s15.16 result + * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result + * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result + * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result * * All go through the same 129-entry quadrant table with linear interpolation. * Worst-case error: ~2 LSB in s15.16 (~3e-5 absolute), except at the four * cardinal angles where the result is exact. + * + * The radian and degree wrappers (fr_sin, fr_cos, fr_tan, etc.) range-reduce + * their input, convert to u16 BAM, and call the BAM-native functions. Small- + * angle bypasses at the zero crossings (sin≈0, cos≈0, tan≈0) use the linear + * approximation sin(δ)≈δ to avoid BAM quantization error where it matters most. */ s32 fr_cos_bam(u16 bam); s32 fr_sin_bam(u16 bam); +#ifndef FR_LEAN + s32 fr_tan_bam(u16 bam); +#endif s32 fr_cos(s32 rad, u16 radix); s32 fr_sin(s32 rad, u16 radix); s32 fr_tan(s32 rad, u16 radix); -#define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM(deg)) -#define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM(deg)) +/* Integer degrees -> BAM using division (exact at all multiples of 45 deg). */ +#define FR_DEG2BAM_I(deg) ((u16)((((s32)(deg) << 16) + ((deg) >= 0 ? 180 : -180)) / 360)) +/* Legacy single-arg integer-degree macros — use FR_CosI / FR_SinI instead */ +/* #define fr_cos_deg(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ +/* #define fr_sin_deg(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) — removed, name reused for 2-arg function */ + +#ifndef FR_LEAN /*=============================================== - * Integer-degree trig API (thin wrappers over the BAM-native path) - * - * FR_CosI(deg) — cos of integer degrees, s15.16 result - * FR_SinI(deg) — sin of integer degrees, s15.16 result - * FR_TanI(deg) — tan of integer degrees, s15.16 result - * FR_Cos(deg, radix) — cos of fixed-radix degrees, s15.16 result - * FR_Sin(deg, radix) — sin of fixed-radix degrees, s15.16 result - * FR_Tan(deg, radix) — tan of fixed-radix degrees, s15.16 result + * Degree-input trig API + * + * FR_CosI(deg) — cos of integer degrees, s15.16 result + * FR_SinI(deg) — sin of integer degrees, s15.16 result + * FR_TanI(deg) — tan of integer degrees, s15.16 result + * fr_cos_deg(deg, radix) — cos of fixed-radix degrees, s15.16 result + * fr_sin_deg(deg, radix) — sin of fixed-radix degrees, s15.16 result + * fr_tan_deg(deg, radix) — tan of fixed-radix degrees, s15.16 result */ -#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM(deg)) -#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM(deg)) +#define FR_CosI(deg) fr_cos_bam(FR_DEG2BAM_I(deg)) +#define FR_SinI(deg) fr_sin_bam(FR_DEG2BAM_I(deg)) + + s32 fr_cos_deg(s32 deg, u16 radix); + s32 fr_sin_deg(s32 deg, u16 radix); + s32 FR_TanI(s32 deg); + s32 fr_tan_deg(s32 deg, u16 radix); - s32 FR_Cos(s16 deg, u16 radix); - s32 FR_Sin(s16 deg, u16 radix); - s32 FR_TanI(s16 deg); - s32 FR_Tan(s16 deg, u16 radix); + /* Legacy macros — use fr_sin_deg/fr_cos_deg/fr_tan_deg in new code */ + #define FR_Sin fr_sin_deg + #define FR_Cos fr_cos_deg + #define FR_Tan fr_tan_deg +#endif /* FR_LEAN */ /* Inverse trig — output in radians at caller-specified radix (s32). * FR_atan2 returns radians at radix 16 (s15.16). @@ -446,7 +484,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { s32 FR_log2(s32 input, u16 radix, u16 output_radix); s32 FR_ln(s32 input, u16 radix, u16 output_radix); +#ifndef FR_LEAN s32 FR_log10(s32 input, u16 radix, u16 output_radix); +#endif /* Power */ s32 FR_pow2(s32 input, u16 radix); @@ -494,7 +534,9 @@ static inline s32 FR_div_rnd(s64 num, s32 den) { * can check `result == FR_DOMAIN_ERROR` to detect domain errors. */ s32 FR_sqrt(s32 input, u16 radix); +#ifndef FR_LEAN s32 FR_hypot(s32 x, s32 y, u16 radix); +#endif /* Fast approximate magnitude — shift-only, no multiply, no 64-bit. * Based on piecewise-linear approximation of sqrt(x*x + y*y). diff --git a/src/FR_math_2D.cpp b/src/FR_math_2D.cpp index c9025b3..b45ca75 100644 --- a/src/FR_math_2D.cpp +++ b/src/FR_math_2D.cpp @@ -5,7 +5,7 @@ * * @copy Copyright (C) <2001-2026> * @author M A Chatterjee - * @version 2.0.7 M. A. Chatterjee, cleaned up naming + * @version 2.0.8 M. A. Chatterjee, cleaned up naming * * This file contains integer math settable fixed point radix math routines for * use on systems in which floating point is not desired or unavailable. diff --git a/src/FR_math_2D.h b/src/FR_math_2D.h index 8f16330..3eaf7d3 100644 --- a/src/FR_math_2D.h +++ b/src/FR_math_2D.h @@ -3,7 +3,7 @@ * * @copy Copyright (C) <2001-2026> * @author M A Chatterjee - * @version 2.0.7 M. A. Chatterjee, cleaned up naming + * @version 2.0.8 M. A. Chatterjee, cleaned up naming * * This file contains integer math settable fixed point radix math routines for * use on systems in which floating point is not desired or unavailable. diff --git a/src/FR_trig_table.h b/src/FR_trig_table.h deleted file mode 100644 index 03a34cd..0000000 --- a/src/FR_trig_table.h +++ /dev/null @@ -1,71 +0,0 @@ -/** - * @file FR_trig_table.h - 129-entry quadrant cosine table for FR_Math 2.0 - * - * This table covers one quadrant [0, pi/2] inclusive in 128 intervals (so - * 129 entries). Indexed by a 7-bit BAM (binary angular measure) sub-index. - * Used by fr_cos_bam / fr_sin_bam in FR_math.c. - * - * Output format: s0.15 (signed, 15 fractional bits). So - * gFR_COS_TAB_Q[0] = round(cos(0) * 32767) = 32767 - * gFR_COS_TAB_Q[64] = round(cos(pi/4) * 32767) ~ 23170 - * gFR_COS_TAB_Q[128] = round(cos(pi/2) * 32767) = 0 - * - * Generated by tools/coef-gen.py — do not hand-edit. - * - * @copy Copyright (C) <2001-2026> - * @author M A Chatterjee - * - * Same zlib license as the rest of the library. - */ -#ifndef __FR_TRIG_TABLE_H__ -#define __FR_TRIG_TABLE_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#define FR_TRIG_TABLE_BITS (7) /* log2(intervals) */ -#define FR_TRIG_TABLE_SIZE ((1 << FR_TRIG_TABLE_BITS) + 1) /* entries = intervals + 1 */ - -/* Derived constants for fr_cos_bam / fr_sin_bam. - * - * The BAM has 16 bits total: 2 top bits for quadrant, 14 bits in-quadrant. - * The in-quadrant value is split into (FR_TRIG_TABLE_BITS) table-index bits - * and (FR_TRIG_FRAC_BITS) interpolation-fraction bits, so - * FR_TRIG_TABLE_BITS + FR_TRIG_FRAC_BITS = 14. - * - * Changing FR_TRIG_TABLE_BITS (and regenerating the table with coef-gen.py) - * is the single knob for ROM-vs-precision trade-off. Every other constant - * below derives from it automatically. - */ -#define FR_TRIG_FRAC_BITS (14 - FR_TRIG_TABLE_BITS) -#define FR_TRIG_FRAC_MAX (1 << FR_TRIG_FRAC_BITS) -#define FR_TRIG_FRAC_MASK (FR_TRIG_FRAC_MAX - 1) -#define FR_TRIG_FRAC_HALF (FR_TRIG_FRAC_MAX >> 1) /* rounding bias */ -#define FR_TRIG_QUADRANT (1 << 14) /* in-quadrant span */ - -static const short gFR_COS_TAB_Q[FR_TRIG_TABLE_SIZE] = { - 32767, 32765, 32757, 32745, 32728, 32705, 32678, 32646, - 32609, 32567, 32521, 32469, 32412, 32351, 32285, 32213, - 32137, 32057, 31971, 31880, 31785, 31685, 31580, 31470, - 31356, 31237, 31113, 30985, 30852, 30714, 30571, 30424, - 30273, 30117, 29956, 29791, 29621, 29447, 29268, 29085, - 28898, 28706, 28510, 28310, 28105, 27896, 27683, 27466, - 27245, 27019, 26790, 26556, 26319, 26077, 25832, 25582, - 25329, 25072, 24811, 24547, 24279, 24007, 23731, 23452, - 23170, 22884, 22594, 22301, 22005, 21705, 21403, 21096, - 20787, 20475, 20159, 19841, 19519, 19195, 18868, 18537, - 18204, 17869, 17530, 17189, 16846, 16499, 16151, 15800, - 15446, 15090, 14732, 14372, 14010, 13645, 13279, 12910, - 12539, 12167, 11793, 11417, 11039, 10659, 10278, 9896, - 9512, 9126, 8739, 8351, 7962, 7571, 7179, 6786, - 6393, 5998, 5602, 5205, 4808, 4410, 4011, 3612, - 3212, 2811, 2410, 2009, 1608, 1206, 804, 402, - 0 -}; - -#ifdef __cplusplus -} // extern "C" -#endif - -#endif /* __FR_TRIG_TABLE_H__ */ diff --git a/tests/fr_math_test.c b/tests/fr_math_test.c index 4c095cc..346b840 100644 --- a/tests/fr_math_test.c +++ b/tests/fr_math_test.c @@ -93,5 +93,5 @@ int main() else printf("tests failed.\n"); - return result; /* remember the value 0 is considered passing in a travis-ci sense */ + return result; /* remember the value 0 is considered passing in a ci sense */ } \ No newline at end of file diff --git a/tests/test_full_coverage.c b/tests/test_full_coverage.c index 0dfd248..36c00f0 100644 --- a/tests/test_full_coverage.c +++ b/tests/test_full_coverage.c @@ -188,7 +188,7 @@ int test_div() { int test_trig_complete() { s16 result; s32 result32; - + /* Test CosI with all quadrants and edge cases */ result = FR_CosI(0); result = FR_CosI(45); @@ -199,51 +199,113 @@ int test_trig_complete() { result = FR_CosI(270); result = FR_CosI(315); result = FR_CosI(360); - + /* Test angles > 180 to hit the branch */ result = FR_CosI(200); /* > 180, will subtract 360 */ result = FR_CosI(350); /* > 180, will subtract 360 */ - + /* Test angles < -180 to hit that branch */ result = FR_CosI(-200); /* < -180, will add 360 */ result = FR_CosI(-350); /* < -180, will add 360 */ - + /* Test SinI */ result = FR_SinI(0); result = FR_SinI(90); result = FR_SinI(180); result = FR_SinI(270); - + /* Test FR_Cos with radix (interpolated) */ result = FR_Cos(45, 8); result = FR_Cos(90, 8); result = FR_Cos(180, 8); - + /* Test FR_Sin with radix */ result = FR_Sin(45, 8); result = FR_Sin(90, 8); - + /* Test TanI with all special cases */ result32 = FR_TanI(0); + if (result32 != 0) return TEST_FAIL; /* tan(0°) = 0 */ result32 = FR_TanI(45); - result32 = FR_TanI(90); /* Special case: returns max */ + if (result32 != 65536) return TEST_FAIL; /* tan(45°) = 1.0 = 65536 */ + result32 = FR_TanI(90); + if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max */ result32 = FR_TanI(135); + if (result32 != -65536) return TEST_FAIL; /* tan(135°) = -1.0 */ result32 = FR_TanI(180); - result32 = FR_TanI(270); /* Special case: returns -max */ - result32 = FR_TanI(-45); /* Negative angle */ - result32 = FR_TanI(-90); /* Negative 90 */ + if (result32 != 0) return TEST_FAIL; /* tan(180°) = 0 */ + result32 = FR_TanI(270); + if (result32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: +max (positive deg) */ + result32 = FR_TanI(-45); + if (result32 != -65536) return TEST_FAIL; /* tan(-45°) = -1.0 */ + result32 = FR_TanI(-90); + if (result32 != -FR_TRIG_MAXVAL) return TEST_FAIL; /* pole: -max */ result32 = FR_TanI(200); /* > 180 */ result32 = FR_TanI(-200); /* < -180 */ - + /* Test FR_Tan with radix */ result32 = FR_Tan(45, 8); result32 = FR_Tan(30, 8); - + (void)result; (void)result32; return TEST_PASS; } +/* Test fr_tan_bam BAM-native tangent */ +int test_tan_bam() { + s32 result; + + /* Exact zeros: 0° and 180° */ + result = fr_tan_bam(0); /* 0° */ + if (result != 0) return TEST_FAIL; + result = fr_tan_bam(0x8000); /* 180° */ + if (result != 0) return TEST_FAIL; + + /* Exact poles: 90° and 270° */ + result = fr_tan_bam(0x4000); /* 90° = +pole */ + if (result != FR_TRIG_MAXVAL) return TEST_FAIL; + result = fr_tan_bam(0xC000); /* 270° = -pole */ + if (result != -FR_TRIG_MAXVAL) return TEST_FAIL; + + /* 45° = 0x2000: tan(45°) = 1.0 = 65536 in s15.16 */ + result = fr_tan_bam(0x2000); + if (result != 65536) return TEST_FAIL; + + /* 135° = 0x6000: tan(135°) = -1.0 */ + result = fr_tan_bam(0x6000); + if (result != -65536) return TEST_FAIL; + + /* 225° = 0xA000: tan(225°) = 1.0 (same as 45°) */ + result = fr_tan_bam(0xA000); + if (result != 65536) return TEST_FAIL; + + /* 315° = 0xE000: tan(315°) = -1.0 */ + result = fr_tan_bam(0xE000); + if (result != -65536) return TEST_FAIL; + + /* 30° ≈ BAM 5461: tan(30°) = 1/sqrt(3) ≈ 0.57735 → 37837 in s15.16 + * Allow ±50 LSB for table interpolation error */ + result = fr_tan_bam(5461); + if (result < 37700 || result > 37950) return TEST_FAIL; + + /* 60° ≈ BAM 10923: tan(60°) = sqrt(3) ≈ 1.73205 → 113512 in s15.16 + * This exercises the second-octant (reciprocal) path. Allow ±200 LSB. */ + result = fr_tan_bam(10923); + if (result < 113200 || result > 113800) return TEST_FAIL; + + /* Near-pole: 89° ≈ BAM 16202: tan(89°) ≈ 57.29 → huge. + * Just verify it's large and positive. */ + result = fr_tan_bam(16202); + if (result < 3000000) return TEST_FAIL; /* > 45.8 in s15.16 */ + + /* Near-pole: 91° ≈ BAM 16566: tan(91°) ≈ -57.29 → large negative */ + result = fr_tan_bam(16566); + if (result > -3000000) return TEST_FAIL; + + return TEST_PASS; +} + /* Test inverse trig functions */ int test_inverse_trig() { s32 result, input; @@ -748,8 +810,8 @@ int test_edge_branches() { * cos==0 and we hit the saturation return. */ r32 = FR_Tan(90, 0); /* bam=16384 (sin>0) */ if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL; - r32 = FR_Tan(270, 0); /* bam=49152 (sin<0) */ - if (r32 != -FR_TRIG_MAXVAL) return TEST_FAIL; + r32 = FR_Tan(270, 0); /* pole: positive deg → +MAXVAL */ + if (r32 != FR_TRIG_MAXVAL) return TEST_FAIL; /* FR_atan2 now returns radians at out_radix. * At radix 16: pi/2 ≈ 102944, pi ≈ 205887. @@ -1031,6 +1093,7 @@ int main() { printf("\nTrigonometry (Complete):\n"); RUN_TEST(test_trig_complete); + RUN_TEST(test_tan_bam); RUN_TEST(test_inverse_trig); printf("\nLogarithms & Powers (Complete):\n"); diff --git a/tests/test_tdd.cpp b/tests/test_tdd.cpp index 5a70a0a..4bff9b2 100644 --- a/tests/test_tdd.cpp +++ b/tests/test_tdd.cpp @@ -58,7 +58,7 @@ * ============================================================ */ static inline double frd(s32 x, int radix) { - return (double)x / (double)(1L << radix); + return (double)x / ldexp(1.0, radix); } typedef struct { @@ -73,13 +73,19 @@ typedef struct { double worst_pct_input; /* input that produced max pct error */ double worst_pct_actual; double worst_pct_expected; + /* Clamped-denominator relative error: denom = max(|expected|, 1% of full_scale) */ + double max_pct_err_clamped; + double sum_pct_err_clamped; + double worst_clamped_input; + double worst_clamped_actual; + double worst_clamped_expected; } stats_t; static void stats_reset(stats_t *s) { memset(s, 0, sizeof(*s)); } -static void stats_add(stats_t *s, double in, double actual, double expected) { +static void stats_add(stats_t *s, double in, double actual, double expected, double full_scale) { double e = actual - expected; if (e < 0) e = -e; if (s->n == 0 || e > s->max_abs_err) { @@ -89,8 +95,7 @@ static void stats_add(stats_t *s, double in, double actual, double expected) { s->worst_expected = expected; } s->sum_abs_err += e; - /* Skip percent error when expected ≈ 0 to avoid division artifacts */ - double pct = (fabs(expected) > 0.01) ? (e / fabs(expected)) * 100.0 : 0.0; + double pct = (expected != 0.0) ? (e / fabs(expected)) * 100.0 : (e != 0.0 ? 100.0 : 0.0); if (pct > s->max_pct_err) { s->max_pct_err = pct; s->worst_pct_input = in; @@ -98,6 +103,17 @@ static void stats_add(stats_t *s, double in, double actual, double expected) { s->worst_pct_expected = expected; } s->sum_pct_err += pct; + /* Clamped-denominator relative error: floor = 1% of full_scale */ + double floor_val = 0.01 * full_scale; + double denom = fabs(expected) > floor_val ? fabs(expected) : floor_val; + double pct_clamped = (denom > 0.0) ? (e / denom) * 100.0 : 0.0; + if (pct_clamped > s->max_pct_err_clamped) { + s->max_pct_err_clamped = pct_clamped; + s->worst_clamped_input = in; + s->worst_clamped_actual = actual; + s->worst_clamped_expected = expected; + } + s->sum_pct_err_clamped += pct_clamped; s->n++; } @@ -105,8 +121,28 @@ static double stats_mean(const stats_t *s) { return s->n ? s->sum_abs_err / s->n : 0.0; } -static double stats_mean_pct(const stats_t *s) { - return s->n ? s->sum_pct_err / s->n : 0.0; +static double stats_mean_pct_clamped(const stats_t *s) { + return s->n ? s->sum_pct_err_clamped / s->n : 0.0; +} + +/* Quantize a double to s15.16 resolution (same grid as library output). */ +static inline double q16(double x) { + return floor(x * 65536.0 + 0.5) / 65536.0; +} + +/* Round-to-nearest float→fixed conversion (not truncation). */ +static inline s32 tofix(double v, int p) { + return (s32)floor(ldexp(v, p) + 0.5); +} + +/* Reference value for tan: libm tan() clamped to ±maxint as s15.16 double. */ +static const double TAN_CLAMP = (double)0x7fffffff / 65536.0; + +static double tan_ref(double rad) { + double t = tan(rad); + if (t > TAN_CLAMP) return TAN_CLAMP; + if (t < -TAN_CLAMP) return -TAN_CLAMP; + return t; } /* Set by FR_SHOWPEAK env var — adds a "Peak at" column to the accuracy table */ @@ -115,9 +151,9 @@ static int g_showpeak = 0; /* Print one accuracy table row, optionally with peak-error input */ static void acc_row(const char *name, const stats_t *s, const char *note) { printf("| %s | %.4f | %.4f | %s", - name, s->max_pct_err, stats_mean_pct(s), note); + name, s->max_pct_err_clamped, stats_mean_pct_clamped(s), note); if (g_showpeak) - printf(" | %.4g", s->worst_pct_input); + printf(" | %.4g", s->worst_clamped_input); printf(" |\n"); } @@ -633,8 +669,8 @@ static void section_arithmetic(void) { }; for (int i = 0; i < (int)(sizeof(div_cases)/sizeof(div_cases[0])); i++) { int r = div_cases[i].r; - s32 xfp = (s32)(div_cases[i].xd * (1L << r)); - s32 yfp = (s32)(div_cases[i].yd * (1L << r)); + s32 xfp = tofix(div_cases[i].xd, r); + s32 yfp = tofix(div_cases[i].yd, r); double expected = div_cases[i].xd / div_cases[i].yd; s32 d64 = FR_DIV(xfp, r, yfp, r); s32 d32 = FR_DIV32(xfp, r, yfp, r); @@ -667,8 +703,8 @@ static void section_trig_int(void) { double exp_sin = sin(deg * M_PI / 180.0); double act_cos = frd(FR_CosI((s16)deg), FR_TRIG_OUT_PREC); double act_sin = frd(FR_SinI((s16)deg), FR_TRIG_OUT_PREC); - stats_add(&cos_stats, deg, act_cos, exp_cos); - stats_add(&sin_stats, deg, act_sin, exp_sin); + stats_add(&cos_stats, deg, act_cos, exp_cos, 1.0); + stats_add(&sin_stats, deg, act_sin, exp_sin, 1.0); } table_header_stats(); @@ -684,7 +720,7 @@ static void section_trig_int(void) { if (deg % 90 == 0 && deg != 0) { tan_skipped++; continue; } double exp_tan = tan(deg * M_PI / 180.0); double act_tan = frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC); - stats_add(&tan_stats, deg, act_tan, exp_tan); + stats_add(&tan_stats, deg, act_tan, exp_tan, TAN_CLAMP); } table_header_stats(); table_row_stats("FR_TanI [-89..89]", &tan_stats); @@ -722,8 +758,8 @@ static void section_trig_frac(void) { double exp_s = sin(deg_d * M_PI / 180.0); double act_c = frd(FR_Cos(deg_fr, 8), FR_TRIG_OUT_PREC); double act_s = frd(FR_Sin(deg_fr, 8), FR_TRIG_OUT_PREC); - stats_add(&cos_f, deg_d, act_c, exp_c); - stats_add(&sin_f, deg_d, act_s, exp_s); + stats_add(&cos_f, deg_d, act_c, exp_c, 1.0); + stats_add(&sin_f, deg_d, act_s, exp_s, 1.0); } table_header_stats(); table_row_stats("FR_Cos r8 0.25 step", &cos_f); @@ -759,10 +795,11 @@ static void section_inverse_trig(void) { /* radix 15 inputs, output radians at radix 16, 200 samples */ for (int i = -200; i <= 200; i++) { double xd = i / 200.0; - s32 fr = (s32)(xd * (1 << 15)); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); s32 rad = FR_acos(fr, 15, 16); - double ref_rad = acos(xd); - stats_add(&acos_stats, xd, frd(rad, 16), ref_rad); + double ref_rad = acos(actual_xd); + stats_add(&acos_stats, actual_xd, frd(rad, 16), ref_rad, M_PI); } table_header_stats(); table_row_stats("FR_acos vs acos() (rad)", &acos_stats); @@ -773,10 +810,11 @@ static void section_inverse_trig(void) { stats_reset(&asin_stats); for (int i = -200; i <= 200; i++) { double xd = i / 200.0; - s32 fr = (s32)(xd * (1 << 15)); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); s32 rad = FR_asin(fr, 15, 16); - double ref_rad = asin(xd); - stats_add(&asin_stats, xd, frd(rad, 16), ref_rad); + double ref_rad = asin(actual_xd); + stats_add(&asin_stats, actual_xd, frd(rad, 16), ref_rad, M_PI); } table_header_stats(); table_row_stats("FR_asin vs asin() (rad)", &asin_stats); @@ -812,13 +850,13 @@ static void section_pow_log(void) { stats_t pow2_stats; stats_reset(&pow2_stats); for (int i = 0; i < (int)(sizeof(pow2_inputs)/sizeof(pow2_inputs[0])); i++) { double x = pow2_inputs[i]; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); s32 r = FR_pow2(fr, 16); double rd = frd(r, 16); double ref = pow(2.0, x); double err = rd - ref; if (err < 0) err = -err; double rel = ref != 0.0 ? err / fabs(ref) : err; - stats_add(&pow2_stats, x, rd, ref); + stats_add(&pow2_stats, x, rd, ref, pow(2.0, 8.0)); printf("| %.4g | %ld | %.6g | %.6g | %.4g | %.4g |\n", x, (long)r, rd, ref, err, rel); } @@ -831,11 +869,12 @@ static void section_pow_log(void) { stats_t pow2_fine; stats_reset(&pow2_fine); for (int i = -800; i <= 800; i++) { double x = i / 100.0; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_pow2(fr, 16); double rd = frd(r, 16); - double ref = pow(2.0, x); - stats_add(&pow2_fine, x, rd, ref); + double ref = pow(2.0, actual_x); + stats_add(&pow2_fine, actual_x, rd, ref, pow(2.0, 8.0)); } table_header_stats(); table_row_stats("FR_pow2 [-8,8] step 0.01", &pow2_fine); @@ -870,7 +909,7 @@ static void section_pow_log(void) { printf("| %ld | %u | %u | %ld | %.6g | %.6g |\n", (long)log2_cases[i].in, log2_cases[i].r, log2_cases[i].or_, (long)r, rd, log2_cases[i].ref); - stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref); + stats_add(&log2_stats, (double)log2_cases[i].in, rd, log2_cases[i].ref, log2(32000.0)); } printf("\n"); table_header_stats(); @@ -883,11 +922,11 @@ static void section_pow_log(void) { double ln_inputs[] = {1, 2, M_E, 4, 8, 10, 100, 1000}; stats_t ln_stats; stats_reset(&ln_stats); for (int i = 0; i < (int)(sizeof(ln_inputs)/sizeof(ln_inputs[0])); i++) { - s32 fr = (s32)(ln_inputs[i] * (1L << 16)); + s32 fr = tofix(ln_inputs[i], 16); s32 r = FR_ln(fr, 16, 16); double rd = frd(r, 16); double ref = log(ln_inputs[i]); - stats_add(&ln_stats, ln_inputs[i], rd, ref); + stats_add(&ln_stats, ln_inputs[i], rd, ref, log(32000.0)); printf("| %.4g | %ld | %.6g | %.6g |\n", ln_inputs[i], (long)r, rd, ref); } printf("\n"); @@ -900,11 +939,11 @@ static void section_pow_log(void) { double log10_inputs[] = {1, 2, 5, 10, 100, 1000, 10000}; stats_t log10_stats; stats_reset(&log10_stats); for (int i = 0; i < (int)(sizeof(log10_inputs)/sizeof(log10_inputs[0])); i++) { - s32 fr = (s32)(log10_inputs[i] * (1L << 16)); + s32 fr = tofix(log10_inputs[i], 16); s32 r = FR_log10(fr, 16, 16); double rd = frd(r, 16); double ref = log10(log10_inputs[i]); - stats_add(&log10_stats, log10_inputs[i], rd, ref); + stats_add(&log10_stats, log10_inputs[i], rd, ref, log10(32000.0)); printf("| %.4g | %ld | %.6g | %.6g |\n", log10_inputs[i], (long)r, rd, ref); } printf("\n"); @@ -915,14 +954,14 @@ static void section_pow_log(void) { md_h3("8.6 FR_EXP and FR_POW10 macros (wrap FR_pow2)"); printf("| Expression | Result | as double | Reference | Note |\n|---|---:|---:|---:|---|\n"); { - s32 in = (s32)(1.0 * (1L << 16)); + s32 in = tofix(1.0, 16); s32 r = FR_EXP(in, 16); double rd = frd(r, 16); printf("| FR_EXP(1.0,16) | %ld | %.6g | %.6g | exp(1) = e |\n", (long)r, rd, M_E); } { - s32 in = (s32)(2.0 * (1L << 16)); + s32 in = tofix(2.0, 16); s32 r = FR_POW10(in, 16); double rd = frd(r, 16); printf("| FR_POW10(2.0,16) | %ld | %.6g | %.6g | 10^2 = 100 |\n", @@ -1251,14 +1290,15 @@ static void section_v2_new(void) { stats_t sqrt_stats; stats_reset(&sqrt_stats); for (int i = 0; i < (int)(sizeof(sqrt_inputs)/sizeof(sqrt_inputs[0])); i++) { double x = sqrt_inputs[i]; - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_sqrt(fr, 16); double rd = frd(r, 16); - double ref = sqrt(x); + double ref = sqrt(actual_x); double err = rd - ref; if (err < 0) err = -err; - stats_add(&sqrt_stats, x, rd, ref); + stats_add(&sqrt_stats, actual_x, rd, ref, sqrt(32000.0)); printf("| %.6g | %ld | %.6g | %.6g | %.4g |\n", - x, (long)r, rd, ref, err); + actual_x, (long)r, rd, ref, err); } printf("\n"); table_header_stats(); @@ -1269,11 +1309,12 @@ static void section_v2_new(void) { stats_t sqrt_fine; stats_reset(&sqrt_fine); for (int i = 1; i <= 1000; i++) { double x = i * 10.0; /* 10..10000 */ - s32 fr = (s32)(x * (1L << 16)); + s32 fr = tofix(x, 16); + double actual_x = frd(fr, 16); s32 r = FR_sqrt(fr, 16); double rd = frd(r, 16); - double ref = sqrt(x); - stats_add(&sqrt_fine, x, rd, ref); + double ref = sqrt(actual_x); + stats_add(&sqrt_fine, actual_x, rd, ref, sqrt(32000.0)); } table_header_stats(); table_row_stats("FR_sqrt [10,10000]", &sqrt_fine); @@ -1299,16 +1340,16 @@ static void section_v2_new(void) { }; stats_t hyp_stats; stats_reset(&hyp_stats); for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) { - s32 fx = (s32)(hyp_cases[i].x * (1L << 16)); - s32 fy = (s32)(hyp_cases[i].y * (1L << 16)); + s32 fx = tofix(hyp_cases[i].x, 16); + s32 fy = tofix(hyp_cases[i].y, 16); + double actual_x = frd(fx, 16), actual_y = frd(fy, 16); s32 r = FR_hypot(fx, fy, 16); double rd = frd(r, 16); - double ref = hypot(hyp_cases[i].x, hyp_cases[i].y); + double ref = hypot(actual_x, actual_y); double err = rd - ref; if (err < 0) err = -err; - stats_add(&hyp_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y), - rd, ref); + stats_add(&hyp_stats, ref, rd, ref, hypot(1000.0, 1000.0)); printf("| %g | %g | %ld | %.6g | %.6g | %.4g |\n", - hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err); + actual_x, actual_y, (long)r, rd, ref, err); } printf("\n"); table_header_stats(); @@ -1320,17 +1361,17 @@ static void section_v2_new(void) { printf("|---:|---:|---:|---:|---:|---:|---:|\n"); stats_t hf8_stats; stats_reset(&hf8_stats); for (int i = 0; i < (int)(sizeof(hyp_cases)/sizeof(hyp_cases[0])); i++) { - s32 fx = (s32)(hyp_cases[i].x * (1L << 16)); - s32 fy = (s32)(hyp_cases[i].y * (1L << 16)); + s32 fx = tofix(hyp_cases[i].x, 16); + s32 fy = tofix(hyp_cases[i].y, 16); + double actual_x = frd(fx, 16), actual_y = frd(fy, 16); s32 r = FR_hypot_fast8(fx, fy); double rd = frd(r, 16); - double ref = hypot(hyp_cases[i].x, hyp_cases[i].y); + double ref = hypot(actual_x, actual_y); double err = rd - ref; if (err < 0) err = -err; double rel = (ref > 0) ? err / ref * 100.0 : 0.0; - stats_add(&hf8_stats, sqrt(hyp_cases[i].x*hyp_cases[i].x + hyp_cases[i].y*hyp_cases[i].y), - rd, ref); + stats_add(&hf8_stats, ref, rd, ref, hypot(1000.0, 1000.0)); printf("| %g | %g | %ld | %.6g | %.6g | %.4g | %.4g |\n", - hyp_cases[i].x, hyp_cases[i].y, (long)r, rd, ref, err, rel); + actual_x, actual_y, (long)r, rd, ref, err, rel); } printf("\n"); table_header_stats(); @@ -1386,7 +1427,7 @@ static void section_v2_new(void) { else if (t < 0.50) ideal = 2.0 - 4.0 * t; /* 1 → 0 */ else if (t < 0.75) ideal = -4.0 * (t - 0.5); /* 0 → -1 */ else ideal = -1.0 + 4.0 * (t - 0.75); /* -1 → 0 */ - stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal); + stats_add(&tri_stats, t * 360.0, (double)actual / 32767.0, ideal, 1.0); } table_header_stats(); table_row_stats("fr_wave_tri vs ideal", &tri_stats); @@ -1472,8 +1513,8 @@ static void section_multiradix(void) { int log2_radixes[] = {8, 12, 16, 24}; for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); /* stay well within s32 */ + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); /* stay well within s32 */ stats_t st; stats_reset(&st); /* Sweep from 0.125 to max representable value */ @@ -1484,24 +1525,26 @@ static void section_multiradix(void) { for (int i = 0; i < ninp; i++) { if (inputs[i] > max_val) continue; /* would overflow s32 */ - s32 fr = (s32)(inputs[i] * scale); + s32 fr = tofix(inputs[i], R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log2(inputs[i]); - stats_add(&st, inputs[i], rd, ref); + double ref = log2(actual_x); + stats_add(&st, actual_x, rd, ref, log2(32000.0)); } /* Fine-grained sweep in [1, min(100, max_val)] */ double sweep_max = max_val < 100.0 ? max_val : 100.0; for (int i = 1; i <= 500; i++) { double x = 1.0 + ((sweep_max - 1.0) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log2(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log2(x); - stats_add(&st, x, rd, ref); + double ref = log2(actual_x); + stats_add(&st, actual_x, rd, ref, log2(32000.0)); } double lsb = 1.0 / scale; @@ -1521,19 +1564,20 @@ static void section_multiradix(void) { for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); double sweep_max = max_val < 100.0 ? max_val : 100.0; stats_t st; stats_reset(&st); for (int i = 1; i <= 500; i++) { double x = 0.5 + ((sweep_max - 0.5) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_ln(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log(x); - stats_add(&st, x, rd, ref); + double ref = log(actual_x); + stats_add(&st, actual_x, rd, ref, log(32000.0)); } double lsb = 1.0 / scale; @@ -1553,19 +1597,20 @@ static void section_multiradix(void) { for (int ri = 0; ri < 4; ri++) { int R = log2_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)((1L << (30 - R))); + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); double sweep_max = max_val < 1000.0 ? max_val : 1000.0; stats_t st; stats_reset(&st); for (int i = 1; i <= 500; i++) { double x = 0.5 + ((sweep_max - 0.5) * i / 500.0); - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); if (fr <= 0) continue; + double actual_x = frd(fr, R); s32 r = FR_log10(fr, (u16)R, (u16)R); double rd = frd(r, R); - double ref = log10(x); - stats_add(&st, x, rd, ref); + double ref = log10(actual_x); + stats_add(&st, actual_x, rd, ref, log10(32000.0)); } double lsb = 1.0 / scale; @@ -1586,8 +1631,8 @@ static void section_multiradix(void) { int div_radixes[] = {8, 12, 16, 20}; for (int ri = 0; ri < 4; ri++) { int R = div_radixes[ri]; - double scale = (double)(1L << R); - double max_val = (double)(1L << (30 - R)); /* stay within s32 */ + double scale = ldexp(1.0, R); + double max_val = ldexp(1.0, 30 - R); /* stay within s32 */ stats_t st_rnd, st_trunc; stats_reset(&st_rnd); stats_reset(&st_trunc); @@ -1606,18 +1651,18 @@ static void section_multiradix(void) { double aq = ay > 0 ? ax / ay : 1e30; /* Skip if inputs or quotient would overflow s32 at this radix */ if (ax >= max_val || ay >= max_val || aq >= max_val) continue; - s32 xfp = (s32)(x * scale); - s32 yfp = (s32)(y * scale); + s32 xfp = tofix(x, R); + s32 yfp = tofix(y, R); if (yfp == 0) continue; - double ref = x / y; + double ref = frd(xfp, R) / frd(yfp, R); s32 d_rnd = FR_DIV(xfp, R, yfp, R); s32 d_trunc = FR_DIV_TRUNC(xfp, R, yfp, R); double rd_rnd = frd(d_rnd, R); double rd_trunc = frd(d_trunc, R); - stats_add(&st_rnd, x / y, rd_rnd, ref); - stats_add(&st_trunc, x / y, rd_trunc, ref); + stats_add(&st_rnd, x / y, rd_rnd, ref, 1.0); + stats_add(&st_trunc, x / y, rd_trunc, ref, 1.0); } } @@ -1644,9 +1689,9 @@ static void section_multiradix(void) { }; for (int i = 0; i < (int)(sizeof(sign_cases)/sizeof(sign_cases[0])); i++) { int R = sign_cases[i].r; - double scale = (double)(1L << R); - s32 xfp = (s32)(sign_cases[i].x * scale); - s32 yfp = (s32)(sign_cases[i].y * scale); + double scale = ldexp(1.0, R); + s32 xfp = tofix(sign_cases[i].x, R); + s32 yfp = tofix(sign_cases[i].y, R); s32 d = FR_DIV(xfp, R, yfp, R); double rd = frd(d, R); double ref = sign_cases[i].x / sign_cases[i].y; @@ -1669,7 +1714,7 @@ static void section_multiradix(void) { int exp_radixes[] = {8, 12, 16, 20}; for (int ri = 0; ri < 4; ri++) { int R = exp_radixes[ri]; - double scale = (double)(1L << R); + double scale = ldexp(1.0, R); stats_t st_exp, st_pow10; stats_reset(&st_exp); stats_reset(&st_pow10); @@ -1677,23 +1722,25 @@ static void section_multiradix(void) { /* Sweep exp(x) for x in [-4, 4] in steps of 0.05 */ for (int i = -80; i <= 80; i++) { double x = i / 20.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_EXP(fr, R); double rd = frd(r, R); - double ref = exp(x); - if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R))) - stats_add(&st_exp, x, rd, ref); + double ref = exp(actual_x); + if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R)) + stats_add(&st_exp, actual_x, rd, ref, 32000.0); } /* Sweep pow10(x) for x in [-2, 2] in steps of 0.05 */ for (int i = -40; i <= 40; i++) { double x = i / 20.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10(fr, R); double rd = frd(r, R); - double ref = pow(10.0, x); - if (r != FR_OVERFLOW_POS && ref < (double)(1L << (31 - R))) - stats_add(&st_pow10, x, rd, ref); + double ref = pow(10.0, actual_x); + if (r != FR_OVERFLOW_POS && ref < ldexp(1.0, 31 - R)) + stats_add(&st_pow10, actual_x, rd, ref, 32000.0); } double lsb = 1.0 / scale; @@ -1725,10 +1772,11 @@ static void section_summary(void) { printf("| FR_FixMulSat | OK | 4.2, 4.3 | int64 fast path with round-to-nearest and explicit saturation |\n"); printf("| FR_FixAddSat | OK | 4.4, 4.5 | Saturation behaves identically on LP64 host and ILP32 MCU |\n"); printf("| FR_CosI / FR_SinI | OK | 5 | s15.16 output; exact at poles; max abs error ~1.5e-5 (1 LSB s15.16) over [-720, +720]; macros routing to fr_*_bam |\n"); - printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | Routed through BAM trig |\n"); + printf("| FR_TanI (integer degrees) | OK | 5.1, 5.2 | BAM table lookup; 65-entry octant table; no 64-bit division |\n"); printf("| FR_Cos / FR_Sin (interpolated) | OK | 6.1 | Within LSB-level error for r8 inputs in s16 |\n"); - printf("| FR_Tan (interpolated) | OK | 6.2 | Locals are s32 |\n"); + printf("| FR_Tan (interpolated) | OK | 6.2 | Via fr_tan_bam; 65-entry octant table |\n"); printf("| fr_cos / fr_sin / fr_cos_bam / fr_sin_bam / fr_cos_deg / fr_sin_deg | OK | 6 | s15.16 output; 129-entry quadrant table with round-to-nearest linear interp; exact at cardinal angles |\n"); + printf("| fr_tan_bam | OK | 14 | 65-entry octant table; first-octant lerp, second-octant 32-bit reciprocal; no 64-bit |\n"); printf("| FR_acos | OK | 7.1 | Max error ~0.83° over [-1, +1] swept at 200 points |\n"); printf("| FR_asin | OK | 7.2 | Same precision as FR_acos |\n"); printf("| FR_atan2 | OK | 7.3 | Via asin/acos + hypot_fast8; 129-entry cos table; `FR_atan2(y, x, out_radix)` returns radians |\n"); @@ -1779,68 +1827,193 @@ static void section_summary(void) { * README.md, docs/README.md, and pages/index.html. * ============================================================ */ +/* ── Neighborhood printer ────────────────────────────────────────── + * Print ±K samples around a center index for any trig sweep. + * func_type selects the function to evaluate: + * 0 = fr_sin_bam 1 = fr_cos_bam 2 = fr_tan_bam + * 3 = fr_sin 4 = fr_cos 5 = fr_tan + * 6 = FR_SinI 7 = FR_CosI 8 = FR_TanI + * 9 = fr_sin_deg 10 = fr_cos_deg 11 = fr_tan_deg + */ +static void neighborhood(const char *label, int func_type, + int center_i, int half, int N, + double range_lo, double range_hi) +{ + printf("\n**Neighborhood: %s (center i=%d ±%d)**\n\n", label, center_i, half); + printf("| i | deg | input_fp | expected | got | abs_err | pct_err |\n"); + printf("|---|---|---|---|---|---|---|\n"); + + for (int k = -half; k <= half; k++) { + int i = (center_i + k % N + N) % N; + double deg, angle, exp_v, got_v; + s32 fp; + + switch (func_type) { + case 0: case 1: case 2: { /* BAM: 0..65535 */ + u16 bam = (u16)i; + deg = bam * 360.0 / 65536.0; + angle = deg * M_PI / 180.0; + if (func_type == 0) { exp_v = q16(sin(angle)); got_v = frd(fr_sin_bam(bam), 16); } + else if (func_type == 1) { exp_v = q16(cos(angle)); got_v = frd(fr_cos_bam(bam), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(fr_tan_bam(bam), 16); } + fp = (s32)bam; + break; + } + case 3: case 4: case 5: { /* radian: ±2π, 131072 pts */ + angle = range_lo + (range_hi - range_lo) * i / (double)N; + fp = tofix(angle, 16); + double actual_angle = frd(fp, 16); + deg = actual_angle * 180.0 / M_PI; + if (func_type == 3) { exp_v = q16(sin(actual_angle)); got_v = frd(fr_sin(fp, 16), 16); } + else if (func_type == 4) { exp_v = q16(cos(actual_angle)); got_v = frd(fr_cos(fp, 16), 16); } + else { exp_v = q16(tan_ref(actual_angle)); got_v = frd(fr_tan(fp, 16), 16); } + break; + } + case 6: case 7: case 8: { /* integer degrees */ + int d = (int)range_lo + i; + deg = (double)d; + angle = d * M_PI / 180.0; + fp = (s32)d; + if (func_type == 6) { exp_v = q16(sin(angle)); got_v = frd(FR_SinI(d), 16); } + else if (func_type == 7) { exp_v = q16(cos(angle)); got_v = frd(FR_CosI(d), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_TanI((s16)d), 16); } + break; + } + default: { /* fixed-radix degrees: ±360, 131072 pts */ + deg = range_lo + (range_hi - range_lo) * i / (double)N; + fp = tofix(deg, 16); + double actual_deg = frd(fp, 16); + angle = actual_deg * M_PI / 180.0; + if (func_type == 9) { exp_v = q16(sin(angle)); got_v = frd(FR_Sin(fp, 16), 16); } + else if (func_type == 10) { exp_v = q16(cos(angle)); got_v = frd(FR_Cos(fp, 16), 16); } + else { exp_v = q16(tan_ref(angle)); got_v = frd(FR_Tan(fp, 16), 16); } + break; + } + } + + double ae = fabs(got_v - exp_v); + double pe = (exp_v != 0.0) ? ae / fabs(exp_v) * 100.0 : (ae != 0.0 ? 100.0 : 0.0); + printf("| %d | %.6f | %d | %.6f | %.6f | %.6f | %.4f%% |\n", + i, deg, (int)fp, exp_v, got_v, ae, pe); + } + printf("\n"); +} + static void section_accuracy_table(void) { md_h2("14. Accuracy Summary Table"); printf("\n"); if (g_showpeak) { - printf("| Function | Max err (%%) | Avg err (%%) | Note | Peak at |\n"); + printf("| Function | Max err (%%)*| Avg err (%%) | Note | Peak at |\n"); printf("|---|---:|---:|---|---:|\n"); } else { - printf("| Function | Max err (%%) | Avg err (%%) | Note |\n"); + printf("| Function | Max err (%%)*| Avg err (%%) | Note |\n"); printf("|---|---:|---:|---|\n"); } const int R = 16; - const double scale = (double)(1L << R); + const double scale = ldexp(1.0, R); /* Persistent stats so we can print diagnostics after the table */ stats_t st_sincos, st_tan, st_asincos, st_atan2; + stats_t st_rad2bam, st_deg2bam, st_sincos_deg_s32, st_tan_deg_s32; stats_reset(&st_sincos); stats_reset(&st_tan); stats_reset(&st_asincos); stats_reset(&st_atan2); + stats_reset(&st_rad2bam); stats_reset(&st_deg2bam); + stats_reset(&st_sincos_deg_s32); stats_reset(&st_tan_deg_s32); - /* --- sin / cos --- */ + /* --- sin / cos (BAM native: 65536-pt) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + u16 bam = (u16)i; + double rad = bam * 2.0 * M_PI / 65536.0; + stats_add(&st, (double)bam, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, (double)bam, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + acc_row("sin/cos (BAM)", &st, "very fast binary angle trig"); + } + + /* --- sin / cos (degree wrappers: 65536-pt at s15.16) --- */ { stats_t &st = st_sincos; - const u16 radix = 7; /* s8.7 degrees: 128 steps/deg, [-256°,+256°) */ - /* 65536-point sweep: all s16 values at radix 7 cover > full circle */ - for (int i = -32768; i <= 32767; i++) { - double deg = (double)i / (1 << radix); - double rad = deg * M_PI / 180.0; - stats_add(&st, deg, frd(FR_Sin((s16)i, radix), FR_TRIG_OUT_PREC), sin(rad)); - stats_add(&st, deg, frd(FR_Cos((s16)i, radix), FR_TRIG_OUT_PREC), cos(rad)); + const u16 radix = 16; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, radix); + double actual_deg = frd(deg_fp, radix); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, radix), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, radix), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); } - /* Special cases: exact integer degrees including negative */ s16 specials[] = {0,30,45,60,90,120,135,150,180,210,225,240,270,300,315,330,360, -30,-45,-60,-90,-120,-135,-150,-180,-210,-225,-240,-270,-300,-315,-330,-360}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), sin(rad)); - stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), cos(rad)); + stats_add(&st, d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + acc_row("sin/cos (deg)", &st, "degree input trig fns"); + } + + /* --- sin / cos (radian wrappers: 65536-pt) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + stats_add(&st, actual_angle, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0); + stats_add(&st, actual_angle, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0); + } + acc_row("sin/cos (rad)", &st, "radian (traditional) trig"); + } + + /* --- tan (BAM native: 65536-pt, full sweep) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + u16 bam = (u16)i; + double ref; + if (bam == 16384) ref = TAN_CLAMP; /* 90°: +maxint */ + else if (bam == 49152) ref = -TAN_CLAMP; /* 270°: -maxint */ + else ref = tan_ref(bam * 2.0 * M_PI / 65536.0); + stats_add(&st, (double)bam, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), q16(ref), TAN_CLAMP); } - acc_row("sin / cos", &st, "65536-pt sweep + specials"); + acc_row("tan (BAM)", &st, "binary angle tangent; ±maxint at poles"); } - /* --- tan --- */ + /* --- tan (degree wrappers: 65536-pt at s15.16, full sweep) --- */ { stats_t &st = st_tan; - const u16 radix = 7; - for (int i = -32768; i <= 32767; i++) { - double deg = (double)i / (1 << radix); - double rad = deg * M_PI / 180.0; - /* Skip near poles: |cos| < 0.01 → tan > 100 */ - if (fabs(cos(rad)) < 0.01) continue; - stats_add(&st, deg, frd(FR_Tan((s16)i, radix), FR_TRIG_OUT_PREC), tan(rad)); + const u16 radix = 16; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, radix); + double actual_deg = frd(deg_fp, radix); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, radix), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } - /* Special cases: integer degrees (avoiding poles) */ s16 specials[] = {0,30,45,60,-30,-45,-60,120,135,150,-120,-135,-150}; for (int si = 0; si < (int)(sizeof(specials)/sizeof(specials[0])); si++) { s16 d = specials[si]; double rad = d * M_PI / 180.0; - stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), tan(rad)); + stats_add(&st, d, frd(FR_TanI(d), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); } - acc_row("tan", &st, "65536-pt sweep (skip poles)"); + acc_row("tan (deg)", &st, "degree input tangent; saturated at poles"); + } + + /* --- tan (radian wrappers: 65536-pt, full sweep) --- */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + stats_add(&st, actual_angle, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); + } + acc_row("tan (rad)", &st, "radian (traditional) tangent"); } /* --- asin / acos --- */ @@ -1848,14 +2021,14 @@ static void section_accuracy_table(void) { stats_t &st = st_asincos; /* 65536-point sweep: all representable values at radix 15 over [-1, +1) */ for (int i = -32768; i <= 32767; i++) { - double xd = (double)i / (1 << 15); + double xd = (double)i / 32768.0; if (xd < -1.0 || xd > 1.0) continue; s32 rad = FR_asin((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), asin(xd)); + stats_add(&st, xd, frd(rad, R), q16(asin(xd)), M_PI); rad = FR_acos((s32)i, 15, R); - stats_add(&st, xd, frd(rad, R), acos(xd)); + stats_add(&st, xd, frd(rad, R), q16(acos(xd)), M_PI); } - acc_row("asin / acos", &st, "65536-pt; sqrt approx near boundary"); + acc_row("asin / acos", &st, "reverse trig, radian output"); } /* --- atan2 --- */ @@ -1874,19 +2047,19 @@ static void section_accuracy_table(void) { for (int i = -32767; i <= 32768; i++) { double angle = i * M_PI / 32768.0; double x = rad * cos(angle), y = rad * sin(angle); - s32 fx = (s32)(x * scale); - s32 fy = (s32)(y * scale); + s32 fx = tofix(x, R); + s32 fy = tofix(y, R); if (fx == 0 && fy == 0) continue; s32 afx = (fx < 0) ? -fx : fx; s32 afy = (fy < 0) ? -fy : fy; s32 minor = (afx < afy) ? afx : afy; if (minor < 256) continue; /* input quantization, not algo */ s32 r = FR_atan2(fy, fx, R); - double ref = atan2(y, x); + double ref = atan2((double)fy, (double)fx); /* Skip near ±pi branch cut: sign depends on sub-LSB * input quantization, not algorithm accuracy. */ if (fabs(fabs(ref) - M_PI) < 0.01) continue; - stats_add(&st, angle * 180.0 / M_PI, frd(r, R), ref); + stats_add(&st, angle * 180.0 / M_PI, frd(r, R), q16(ref), M_PI); } } /* Special cases: exact quadrant/octant/30-degree angles */ @@ -1895,29 +2068,26 @@ static void section_accuracy_table(void) { for (int si = 0; si < (int)(sizeof(specials_deg)/sizeof(specials_deg[0])); si++) { double angle = specials_deg[si] * M_PI / 180.0; double x = 100.0 * cos(angle), y = 100.0 * sin(angle); - s32 fx = (s32)(x * scale), fy = (s32)(y * scale); + s32 fx = tofix(x, R), fy = tofix(y, R); if (fx == 0 && fy == 0) continue; s32 r = FR_atan2(fy, fx, R); - stats_add(&st, specials_deg[si], frd(r, R), atan2(y, x)); + stats_add(&st, specials_deg[si], frd(r, R), q16(atan2((double)fy, (double)fx)), M_PI); } - acc_row("atan2", &st, "65536x5 radii; asin/acos+hypot_fast8"); + acc_row("atan2", &st, "reverse tangent, always safe"); } /* --- atan --- */ { stats_t st; stats_reset(&st); - /* Sweep atan(x) for x in [-10, 10] with fine steps near zero. - * FR_atan(input, radix, out_radix) calls FR_atan2(input, 1< 32000.0 || ref < 1e-6) continue; /* skip overflow/underflow */ - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } - acc_row("exp", &st, "FR_MULK28 + FR_pow2"); + acc_row("exp", &st, "shift/add only for speed"); } /* --- exp_fast (FR_EXP_FAST) --- */ @@ -2009,11 +2186,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -400; i <= 400; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_EXP_FAST(fr, R); - double ref = exp(x); + double ref = exp(actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("exp_fast", &st, "Shift-only scaling"); } @@ -2023,13 +2201,14 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -200; i <= 200; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10(fr, R); - double ref = pow(10.0, x); + double ref = pow(10.0, actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } - acc_row("pow10", &st, "FR_MULK28 + FR_pow2"); + acc_row("pow10", &st, "shift/add only for speed"); } /* --- pow10_fast (FR_POW10_FAST) --- */ @@ -2037,11 +2216,12 @@ static void section_accuracy_table(void) { stats_t st; stats_reset(&st); for (int i = -200; i <= 200; i++) { double x = i / 100.0; - s32 fr = (s32)(x * scale); + s32 fr = tofix(x, R); + double actual_x = frd(fr, R); s32 r = FR_POW10_FAST(fr, R); - double ref = pow(10.0, x); + double ref = pow(10.0, actual_x); if (ref > 32000.0 || ref < 1e-6) continue; - stats_add(&st, x, frd(r, R), ref); + stats_add(&st, actual_x, frd(r, R), q16(ref), 32000.0); } acc_row("pow10_fast", &st, "Shift-only scaling"); } @@ -2054,13 +2234,14 @@ static void section_accuracy_table(void) { {1,1},{0.5,0.5},{100,100},{1000,1},{1,1000} }; for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) { - s32 fx = (s32)(cases[i].x * scale); - s32 fy = (s32)(cases[i].y * scale); + s32 fx = tofix(cases[i].x, R); + s32 fy = tofix(cases[i].y, R); + double actual_x = frd(fx, R), actual_y = frd(fy, R); s32 r = FR_hypot(fx, fy, R); - double ref = hypot(cases[i].x, cases[i].y); - stats_add(&st, ref, frd(r, R), ref); + double ref = hypot(actual_x, actual_y); + stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0)); } - acc_row("hypot (exact)", &st, "64-bit intermediate"); + acc_row("hypot (exact)", &st, "Uses 64-bit intermediate"); } /* --- hypot_fast8 (8-seg) --- */ @@ -2071,16 +2252,186 @@ static void section_accuracy_table(void) { {100,100},{1000,1},{1,1000},{7,24},{20,21} }; for (int i = 0; i < (int)(sizeof(cases)/sizeof(cases[0])); i++) { - s32 fx = (s32)(cases[i].x * scale); - s32 fy = (s32)(cases[i].y * scale); + s32 fx = tofix(cases[i].x, R); + s32 fy = tofix(cases[i].y, R); + double actual_x = frd(fx, R), actual_y = frd(fy, R); s32 r = FR_hypot_fast8(fx, fy); - double ref = hypot(cases[i].x, cases[i].y); - if (ref > 0) stats_add(&st, ref, frd(r, R), ref); + double ref = hypot(actual_x, actual_y); + if (ref > 0) stats_add(&st, ref, frd(r, R), q16(ref), hypot(1000.0, 1000.0)); } acc_row("hypot_fast8 (8-seg)", &st, "Shift-only, no multiply"); } printf("\n"); + printf("\n*Relative error; reference clamped to 1%% of full-scale output.\n\n"); + + /* ── Test-only rows (not library functions — conversion & pipeline checks) ── */ + md_h3("14.0.1 Conversion & pipeline accuracy (test-only)"); + printf("| Function | Max err (%%) | Avg err (%%) | Note |\n"); + printf("|---|---:|---:|---|\n"); + + /* --- rad→BAM conversion (standalone: 65536-pt) --- */ + { + stats_t &st = st_rad2bam; + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = tofix(angle, R); + u16 got = fr_rad_to_bam(rad_fp, 16); + /* Exact BAM: wrap to u16 */ + double exact_bam_d = angle * 65536.0 / (2.0 * M_PI); + s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5); + u16 expected = (u16)(exact_bam_s & 0xFFFF); + /* Feed stats as degrees so the error is interpretable */ + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, angle, got_deg, exp_deg, 360.0); + } + { + char note[128]; + snprintf(note, sizeof(note), + "fr_rad_to_bam() ±2π at r16; max %d BAM LSB", + (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5)); + acc_row("rad→BAM conv", &st, note); + } + } + + /* --- deg→BAM conversion (standalone: 65536-pt) --- */ + { + stats_t &st = st_deg2bam; + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, R); + u16 got = fr_deg_to_bam(deg_fp, 16); + /* Exact BAM: wrap to u16 */ + double exact_bam_d = deg * 65536.0 / 360.0; + s32 exact_bam_s = (s32)floor(exact_bam_d + 0.5); + u16 expected = (u16)(exact_bam_s & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, deg, got_deg, exp_deg, 360.0); + } + { + char note[128]; + snprintf(note, sizeof(note), + "fr_deg_to_bam() ±360° at r16; max %d BAM LSB", + (int)(st.max_abs_err / (360.0 / 65536.0) + 0.5)); + acc_row("deg→BAM conv", &st, note); + } + } + + /* --- sin / cos via integer degrees ±360° --- */ + { + stats_t &st = st_sincos_deg_s32; + for (int deg = -360; deg <= 360; deg++) { + double rad = deg * M_PI / 180.0; + stats_add(&st, (double)deg, frd(FR_SinI(deg), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + stats_add(&st, (double)deg, frd(FR_CosI(deg), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + acc_row("sin/cos (int deg)", &st, "FR_SinI/FR_CosI ±360° integer degrees"); + } + + /* --- tan via integer degrees ±360° --- */ + { + stats_t &st = st_tan_deg_s32; + for (int deg = -360; deg <= 360; deg++) { + double rad = deg * M_PI / 180.0; + stats_add(&st, (double)deg, frd(FR_TanI((s16)deg), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); + } + acc_row("tan (int deg)", &st, "FR_TanI ±360° full; sat at poles"); + } + + /* --- Conversion macro accuracy (all 6 direction macros) --- */ + + /* FR_RAD2BAM macro: test within safe range (±pi at r16) */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -M_PI + (2.0 * M_PI * i / 65536.0); + s32 rad_fp = tofix(angle, R); + s32 raw = FR_RAD2BAM(rad_fp); + u16 got = (u16)((raw + (1 << 15)) >> 16); + double exact_d = angle * 65536.0 / (2.0 * M_PI); + u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, angle, got_deg, exp_deg, 360.0); + } + acc_row("FR_RAD2BAM macro", &st, "Shift-approx ±π at r16; overflows beyond ±4 rad"); + } + + /* FR_DEG2BAM macro: test within safe range (±180° at r7) */ + { + stats_t st; stats_reset(&st); + const u16 radix = 7; + for (int i = -23040; i <= 23040; i++) { /* ±180° at r7 = ±23040 */ + double deg = (double)i / 128.0; + s32 raw = FR_DEG2BAM((s32)i); + u16 got = (u16)((raw + (1 << (radix - 1))) >> radix); + double exact_d = deg * 65536.0 / 360.0; + u16 expected = (u16)((s32)floor(exact_d + 0.5) & 0xFFFF); + double got_deg = got * (360.0 / 65536.0); + double exp_deg = expected * (360.0 / 65536.0); + stats_add(&st, deg, got_deg, exp_deg, 360.0); + } + acc_row("FR_DEG2BAM macro", &st, "Shift-approx ±180° at r7; overflows beyond ±256°"); + } + + /* FR_BAM2RAD macro: multiplies by 2π/65536 using shifts. + * BAM 0..32767 at r16 (upper half overflows s32 when <<16). */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 32768; i++) { + s32 bam_r16 = (s32)i << 16; + s32 rad_fp = FR_BAM2RAD(bam_r16); + double got_rad = frd(rad_fp, 16); + double exp_rad = (double)i * 2.0 * M_PI / 65536.0; + stats_add(&st, (double)i, got_rad, exp_rad, 2.0 * M_PI); + } + acc_row("FR_BAM2RAD macro", &st, "BAM→rad r16 full (0..32767; <<16 overflow above)"); + } + + /* FR_BAM2DEG macro: multiplies by 360/65536 using shifts. + * BAM 0..32767 at r16 (same s32 overflow limit). */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 32768; i++) { + s32 bam_r16 = (s32)i << 16; + s32 deg_fp = FR_BAM2DEG(bam_r16); + double got_deg = frd(deg_fp, 16); + double exp_deg = (double)i * 360.0 / 65536.0; + stats_add(&st, (double)i, got_deg, exp_deg, 360.0); + } + acc_row("FR_BAM2DEG macro", &st, "BAM→deg r16 full (0..32767; <<16 overflow above)"); + } + + /* FR_DEG2RAD macro: 65536-pt ±360° at r16 full */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double deg = -360.0 + (720.0 * i / 65536.0); + s32 deg_fp = tofix(deg, R); + s32 rad_fp = FR_DEG2RAD(deg_fp); + double got_rad = frd(rad_fp, 16); + double exp_rad = deg * M_PI / 180.0; + stats_add(&st, deg, got_rad, exp_rad, 2.0 * M_PI); + } + acc_row("FR_DEG2RAD macro", &st, "65536-pt ±360° r16 full"); + } + + /* FR_RAD2DEG macro: 65536-pt ±2π at r16 full */ + { + stats_t st; stats_reset(&st); + for (int i = 0; i < 65536; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / 65536.0); + s32 rad_fp = tofix(angle, R); + s32 deg_fp = FR_RAD2DEG(rad_fp); + double got_deg = frd(deg_fp, 16); + double exp_deg = angle * 180.0 / M_PI; + stats_add(&st, angle, got_deg, exp_deg, 360.0); + } + acc_row("FR_RAD2DEG macro", &st, "65536-pt ±2π r16 full"); + } + printf("\n"); /* Diagnostic: show where each trig function's worst % error occurs */ @@ -2092,10 +2443,14 @@ static void section_accuracy_table(void) { printf("|---|---|---:|---:|---:|---:|\n"); struct { const char *name; stats_t *s; } diag[] = { - {"sin / cos", &st_sincos}, - {"tan", &st_tan}, - {"asin/acos", &st_asincos}, - {"atan2", &st_atan2}, + {"sin / cos", &st_sincos}, + {"tan", &st_tan}, + {"rad→BAM conv", &st_rad2bam}, + {"deg→BAM conv", &st_deg2bam}, + {"sin/cos (int deg)",&st_sincos_deg_s32}, + {"tan (int deg)", &st_tan_deg_s32}, + {"asin/acos", &st_asincos}, + {"atan2", &st_atan2}, }; for (int d = 0; d < (int)(sizeof(diag)/sizeof(diag[0])); d++) { stats_t *s = diag[d].s; @@ -2106,6 +2461,325 @@ static void section_accuracy_table(void) { s->max_pct_err); } printf("\n"); + + /* ── 14.3 Per-function trig sweep table ──────────────────────────── + * One row per public entry point. Each function is swept + * independently over its full domain so that peak abs / pct errors + * are attributable to a single function, not a combined aggregate. + * + * Peak pct err is raw |err|/|expected|*100 — no clamping. Near + * zero crossings (sin≈0, cos≈0, asin(0)≈0) the denominator is + * tiny and pct blows up even when abs err is sub-LSB. The Notes + * column flags these rows. Use Peak abs err and Mean abs err to + * judge accuracy at zero crossings; use Peak pct err elsewhere. + */ + md_h3("14.2 Neighborhoods (peak error ±10 samples)"); + + /* fr_sin radian at i=0 (-360°) — zero crossing neighborhood */ + neighborhood("fr_sin radian @ -360 deg (i=0)", 3, 0, 10, 131072, + -2.0 * M_PI, 2.0 * M_PI); + + md_h3("14.3 Per-function trig sweep"); + + printf("| Function | Input | Range start | Range end | Points | Increment | " + "Peak abs err | @abs_err | Peak pct err | @pct_err | Expected | Got | Mean abs err | Notes |\n"); + printf("|---|---|---:|---:|---:|---|---:|---:|---:|---:|---:|---:|---:|---|\n"); + + /* Helper: print one row of the per-function table */ + #define SWEEP_ROW(name, sig, rlo, rhi, pts, step, st, note) \ + printf("| %s | %s | %s | %s | %d | %s | %f | %.4f | %.4f%% | %.4f | %f | %f | %f | %s |\n", \ + name, sig, rlo, rhi, pts, step, \ + (st).max_abs_err, (st).worst_input, (st).max_pct_err, \ + (st).worst_pct_input, (st).worst_pct_expected, (st).worst_pct_actual, \ + stats_mean(&(st)), note) + + /* fr_sin_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + stats_add(&st, deg, frd(fr_sin_bam(bam), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("fr_sin_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, ""); + } + /* fr_cos_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + stats_add(&st, deg, frd(fr_cos_bam(bam), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("fr_cos_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, ""); + } + /* fr_tan_bam */ + { + stats_t st; stats_reset(&st); + for (int b = 0; b < 65536; b++) { + u16 bam = (u16)b; + double rad = bam * 2.0 * M_PI / 65536.0; + double deg = bam * 360.0 / 65536.0; + double ref; + if (bam == 16384) ref = TAN_CLAMP; + else if (bam == 49152) ref = -TAN_CLAMP; + else ref = q16(tan_ref(rad)); + stats_add(&st, deg, frd(fr_tan_bam(bam), FR_TRIG_OUT_PREC), ref, TAN_CLAMP); + } + SWEEP_ROW("fr_tan_bam", "(u16 bam)", "0", "360", 65536, "0.0055 deg", st, "pole clamped"); + } + /* fr_sin (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_sin(rad_fp, 16), FR_TRIG_OUT_PREC), q16(sin(actual_angle)), 1.0); + } + SWEEP_ROW("fr_sin", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-π small-angle bypass"); + } + /* fr_cos (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_cos(rad_fp, 16), FR_TRIG_OUT_PREC), q16(cos(actual_angle)), 1.0); + } + SWEEP_ROW("fr_cos", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, ""); + } + /* fr_tan (radian) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double angle = -2.0 * M_PI + (4.0 * M_PI * i / (double)N2); + s32 rad_fp = tofix(angle, 16); + double actual_angle = frd(rad_fp, 16); + double deg = actual_angle * 180.0 / M_PI; + stats_add(&st, deg, frd(fr_tan(rad_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(actual_angle)), TAN_CLAMP); + } + SWEEP_ROW("fr_tan", "(s32 rad, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "sign extract + small-angle bypass at 0/pi/2pi; r24 cot(d)~1/d near poles; BAM table elsewhere"); + } + /* FR_SinI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + stats_add(&st, (double)d, frd(FR_SinI(d), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("FR_SinI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, ""); + } + /* FR_CosI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + stats_add(&st, (double)d, frd(FR_CosI(d), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("FR_CosI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, ""); + } + /* FR_TanI */ + { + stats_t st; stats_reset(&st); + for (int d = -360; d <= 360; d++) { + double rad = d * M_PI / 180.0; + double ref; + if (d % 180 == 90 || d % 180 == -90) + ref = (d > 0) ? TAN_CLAMP : -TAN_CLAMP; + else + ref = q16(tan_ref(rad)); + stats_add(&st, (double)d, frd(FR_TanI((s16)d), FR_TRIG_OUT_PREC), ref, TAN_CLAMP); + } + SWEEP_ROW("FR_TanI", "(s16 deg)", "-360", "+360", 721, "1 deg", st, "pole clamped"); + } + /* fr_sin_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Sin(deg_fp, 16), FR_TRIG_OUT_PREC), q16(sin(rad)), 1.0); + } + SWEEP_ROW("fr_sin_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak at sin=0 crossing"); + } + /* fr_cos_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Cos(deg_fp, 16), FR_TRIG_OUT_PREC), q16(cos(rad)), 1.0); + } + SWEEP_ROW("fr_cos_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "near-90/270 small-angle bypass"); + } + /* fr_tan_deg (fixed-radix degrees, radix 16) */ + { + stats_t st; stats_reset(&st); + const int N2 = 131072; + for (int i = 0; i < N2; i++) { + double deg = -360.0 + 720.0 * i / (double)N2; + s32 deg_fp = tofix(deg, 16); + double actual_deg = frd(deg_fp, 16); + double rad = actual_deg * M_PI / 180.0; + stats_add(&st, actual_deg, frd(FR_Tan(deg_fp, 16), FR_TRIG_OUT_PREC), q16(tan_ref(rad)), TAN_CLAMP); + } + SWEEP_ROW("fr_tan_deg", "(s32 deg, u16 radix)", "-360", "+360", 131072, "0.0055 deg", st, "pct peak near tan pole"); + } + + /* --- Inverse Trig --- */ + + /* FR_acos */ + { + stats_t st; stats_reset(&st); + const int N = 65537; + for (int i = 0; i < N; i++) { + double xd = -1.0 + 2.0 * i / (double)(N - 1); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); + s32 rad = FR_acos(fr, 15, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(acos(actual_xd)), M_PI); + } + SWEEP_ROW("FR_acos", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out"); + } + /* FR_asin */ + { + stats_t st; stats_reset(&st); + const int N = 65537; + for (int i = 0; i < N; i++) { + double xd = -1.0 + 2.0 * i / (double)(N - 1); + s32 fr = tofix(xd, 15); + double actual_xd = frd(fr, 15); + s32 rad = FR_asin(fr, 15, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(asin(actual_xd)), M_PI); + } + SWEEP_ROW("FR_asin", "(s32,u16 15,u16 16)", "-1.0", "+1.0", N, "3.05e-5", st, "r15 in, r16 out; pct peak at asin(0)=0"); + } + /* FR_atan */ + { + stats_t st; stats_reset(&st); + const int N = 131072; + for (int i = 0; i < N; i++) { + double xd = -10.0 + 20.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 rad = FR_atan(fr, 16, 16); + stats_add(&st, actual_xd, frd(rad, 16), q16(atan(actual_xd)), M_PI / 2.0); + } + SWEEP_ROW("FR_atan", "(s32,u16 16,u16 16)", "-10.0", "+10.0", N, "1.53e-4", st, "r16 in/out"); + } + /* FR_atan2 — unit circle sweep */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double angle = -M_PI + 2.0 * M_PI * i / (double)N; + double deg = angle * 180.0 / M_PI; + s32 x = tofix(cos(angle), 15); + s32 y = tofix(sin(angle), 15); + s32 rad = FR_atan2(y, x, 16); + stats_add(&st, deg, frd(rad, 16), q16(atan2((double)y, (double)x)), M_PI); + } + SWEEP_ROW("FR_atan2", "(s32 y,s32 x,u16 16)", "-180", "+180", N, "0.0055 deg", st, "unit circle r15"); + } + + /* --- Log / Exp --- */ + + /* FR_log2 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_log2(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log2(actual_xd)), log2(32000.0)); + } + SWEEP_ROW("FR_log2", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_ln */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_ln(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log(actual_xd)), log(32000.0)); + } + SWEEP_ROW("FR_ln", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_log10 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 1; i <= N; i++) { + double xd = 0.01 + (256.0 - 0.01) * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_log10(fr, 16, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(log10(actual_xd)), log10(32000.0)); + } + SWEEP_ROW("FR_log10", "(s32,u16 16,u16 16)", "0.01", "256", N, "0.0039", st, "r16 in/out"); + } + /* FR_pow2 */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -8.0 + 16.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_pow2(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(pow(2.0, actual_xd)), pow(2.0, 8.0)); + } + SWEEP_ROW("FR_pow2", "(s32,u16 16)", "-8.0", "+8.0", N, "2.44e-4", st, "r16 in/out"); + } + /* FR_EXP (macro wrapping FR_pow2) */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -5.0 + 15.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_EXP(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(exp(actual_xd)), 32000.0); + } + SWEEP_ROW("FR_EXP", "(s32,u16 16)", "-5.0", "+10.0", N, "2.29e-4", st, "macro, wraps FR_pow2"); + } + /* FR_POW10 (macro wrapping FR_pow2) */ + { + stats_t st; stats_reset(&st); + const int N = 65536; + for (int i = 0; i < N; i++) { + double xd = -2.0 + 6.0 * i / (double)N; + s32 fr = tofix(xd, 16); + double actual_xd = frd(fr, 16); + s32 r = FR_POW10(fr, 16); + stats_add(&st, actual_xd, frd(r, 16), q16(pow(10.0, actual_xd)), 32000.0); + } + SWEEP_ROW("FR_POW10", "(s32,u16 16)", "-2.0", "+4.0", N, "9.15e-5", st, "macro, wraps FR_pow2"); + } + + #undef SWEEP_ROW + printf("\n"); } int main(void) { @@ -2131,4 +2805,4 @@ int main(void) { section_accuracy_table(); return 0; -} +} \ No newline at end of file diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..29d8ac7 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,131 @@ +# FR_Math Tools + +Diagnostic and code-generation utilities for the FR_Math library. + +## trig_neighborhood + +Sweep any math function over a range and print a neighborhood table showing +raw output, expected reference, absolute error, and percent error. + +**Build:** `make tools` + +**Usage:** +``` +trig_neighborhood
      [options] +``` + +### Supported functions (25) + +| Category | Functions | +|---|---| +| Trig (degrees) | `fr_sin_bam`, `fr_cos_bam`, `fr_tan_bam`, `fr_sin`, `fr_cos`, `fr_tan`, `FR_SinI`, `FR_CosI`, `FR_TanI`, `fr_sin_deg`, `fr_cos_deg`, `fr_tan_deg` | +| Inverse trig | `FR_acos`, `FR_asin`, `FR_atan`, `FR_atan2` | +| Logarithmic | `FR_log2`, `FR_ln`, `FR_log10` | +| Exponential | `FR_pow2`, `FR_EXP`, `FR_POW10` | +| Other | `FR_sqrt`, `FR_hypot`, `FR_hypot_fast8` | + +### Options + +| Option | Description | Default | +|---|---|---| +| `--inc ` | Increment per sample | function-dependent | +| `--fmt md\|csv\|ascii` | Output format | `md` | +| `--radix ` | Input radix for fixed-point | 16 | +| `--out_radix ` | Output radix (inv trig, log) | 16 | +| `--y ` | Fixed y for hypot functions | 0.0 | + +### Default increments + +- Trig + FR_atan2: `360/65536` (~0.0055 degrees) +- FR_acos, FR_asin: `1/32768` (~3.05e-5) +- All others: `1/65536` (~1.53e-5) + +### Examples + +```bash +# Cosine near -90 degrees +build/trig_neighborhood fr_cos -90 15 + +# Sine sweep in CSV format +build/trig_neighborhood fr_sin -360 10 --fmt csv + +# Tangent near pole +build/trig_neighborhood fr_tan 89.5 20 --inc 0.01 + +# Arcsine near zero +build/trig_neighborhood FR_asin 0.0001 15 --inc 3.05e-5 --radix 15 + +# Log2 near 1.0 +build/trig_neighborhood FR_log2 1.0 15 --inc 0.01 + +# Atan2 near 90 degrees +build/trig_neighborhood FR_atan2 90 15 + +# Hypot with y=50 +build/trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8 +``` + +--- + +## coef-gen.py + +Python script for generating power-of-two coefficient approximations. Given a +target floating-point value, searches for combinations of `+/- 2^(-k)` terms +that best approximate the value using only bit-shifts and adds. + +**Usage:** `python3 tools/coef-gen.py` + +--- + +## fr_coef-gen.cpp + +C++ coefficient generator for 32-bit host. Similar purpose to `coef-gen.py` +but runs natively and can be used for brute-force search over larger term +counts. + +**Build:** `g++ -O2 tools/fr_coef-gen.cpp -o build/fr_coef-gen` + +--- + +## gen_pow2_table.py + +Generates the `gFR_POW2_FRAC_TAB[65]` lookup table used by `FR_pow2()`. +Output is a C array suitable for inclusion in FR_math.c. + +**Usage:** `python3 tools/gen_pow2_table.py` + +--- + +## gen_radix28_constants.py + +Generates radix-28 constants used by FR_EXP, FR_ln, FR_log10 for base +conversion (e.g., `FR_kLOG2E_28`, `FR_kLOG2_10_28`). + +**Usage:** `python3 tools/gen_radix28_constants.py` + +--- + +## check_published_versions.sh + +Verifies that published version tags match the version defined in +`FR_math.h` (`FR_MATH_VERSION_HEX`). Used in CI/release workflows. + +**Usage:** `bash tools/check_published_versions.sh` + +--- + +## make_release.sh + +Release automation script. Bumps version, tags, and prepares release +artifacts. + +**Usage:** `bash tools/make_release.sh` + +--- + +## interp_analysis.html + +Interactive HTML/JS visualization for interpolation analysis. Open in a +browser to explore interpolation error characteristics. + +**Usage:** Open `tools/interp_analysis.html` in a web browser. diff --git a/tools/make_release.sh b/tools/make_release.sh index 28f7647..0ef8adb 100755 --- a/tools/make_release.sh +++ b/tools/make_release.sh @@ -112,7 +112,10 @@ do_sync_version() { echo "" echo " Running sync_version.sh to fix drift..." bash "${PROJECT_ROOT}/scripts/sync_version.sh" - git add -A + # Stage only the files sync_version.sh touches (not the whole tree). + git add src/FR_math.h VERSION README.md pages/version.json \ + src/FR_math_2D.h src/FR_math_2D.cpp \ + library.properties library.json idf_component.yml llms.txt pass "Version synced to $VER_STRING (changes staged)" else pass "All version strings match $VER_STRING" @@ -149,7 +152,7 @@ do_validate() { grep -E "Failed: [1-9]" "${test_log}" fail "Test failures detected" fi - TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum}') + TOTAL_PASSED=$(grep -Eo "Passed: [0-9]+" "${test_log}" | awk -F: '{sum+=$2} END {print sum+0}') pass "${TOTAL_PASSED} tests passed." echo "" @@ -228,7 +231,7 @@ do_cross_compile() { # Files the pipeline itself may modify (badge update, version sync). # Anything outside this list is unexpected and should block the release. -PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/assets/site.js src/FR_math_2D.h src/FR_math_2D.cpp docs/README.md pages/index.html" +PIPELINE_FILES="README.md VERSION src/FR_math.h library.properties library.json idf_component.yml llms.txt pages/version.json src/FR_math_2D.h src/FR_math_2D.cpp" do_commit_pipeline_changes() { step_header "Commit pipeline-generated changes" @@ -634,8 +637,8 @@ do_switch_master() { do_verify_master() { step_header "Verify build on master" - run_cmd make clean >/dev/null 2>&1 - run_cmd make test >/dev/null 2>&1 + make clean >/dev/null 2>&1 + make test >/dev/null 2>&1 pass "All tests pass on master." } diff --git a/tools/trig_neighborhood.cpp b/tools/trig_neighborhood.cpp new file mode 100644 index 0000000..4c275d8 --- /dev/null +++ b/tools/trig_neighborhood.cpp @@ -0,0 +1,536 @@ +/* + * trig_neighborhood.cpp — sweep any math function over a range, print neighborhood table + * + * Usage: + * trig_neighborhood
      [--inc ] [--fmt md|csv|ascii] + * [--radix ] [--out_radix ] [--y ] + * + * Trig functions: + * fr_sin_bam, fr_cos_bam, fr_tan_bam, + * fr_sin, fr_cos, fr_tan, + * FR_SinI, FR_CosI, FR_TanI, + * fr_sin_deg, fr_cos_deg, fr_tan_deg + * + * Inverse trig: + * FR_acos, FR_asin, FR_atan, FR_atan2 + * + * Logarithmic: + * FR_log2, FR_ln, FR_log10 + * + * Exponential: + * FR_pow2, FR_EXP, FR_POW10 + * + * Other: + * FR_sqrt, FR_hypot, FR_hypot_fast8 + * + * center: center value (degrees for trig/atan2, input value for others) + * half: number of samples on each side of center + * --inc: increment (default depends on function type) + * --fmt: output format: md (default), csv, ascii + * --radix: input radix for fixed-point functions (default: 16) + * --out_radix: output radix for inverse trig and log (default: 16) + * --y: fixed y value for FR_hypot / FR_hypot_fast8 (default: 0.0) + * + * Examples: + * trig_neighborhood fr_cos -90 15 + * trig_neighborhood fr_sin -360 10 --fmt csv + * trig_neighborhood fr_tan 89.5 20 --inc 0.01 + * trig_neighborhood fr_sin_deg 45 10 --radix 8 + * trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16 + * trig_neighborhood FR_log2 1.0 15 --inc 0.01 + * trig_neighborhood FR_atan2 90 15 + * trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8 + * + * Build: + * make tools + */ +#include +#include +#include +#include +#include "FR_math.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +static double frd(s32 v, int p) { return (double)v / ldexp(1.0, p); } +static double qN(double v, int p) { double s = ldexp(1.0, p); return floor(v * s + 0.5) / s; } +/* Round-to-nearest float→fixed conversion (not truncation) */ +static s32 tofix(double v, int p) { return (s32)floor(ldexp(v, p) + 0.5); } +static const double TAN_CLAMP = (double)0x7fffffff / 65536.0; +static double tan_ref(double rad) { + double t = tan(rad); + if (t > TAN_CLAMP) return TAN_CLAMP; + if (t < -TAN_CLAMP) return -TAN_CLAMP; + return t; +} + +enum Func { + F_SIN_BAM, F_COS_BAM, F_TAN_BAM, + F_SIN, F_COS, F_TAN, + F_SINI, F_COSI, F_TANI, + F_SIN_DEG, F_COS_DEG, F_TAN_DEG, + F_ACOS, F_ASIN, F_ATAN, F_ATAN2, + F_LOG2, F_LN, F_LOG10, + F_POW2, F_EXP, F_POW10, + F_SQRT, F_HYPOT, F_HYPOT_FAST8, + F_UNKNOWN +}; + +enum Fmt { FMT_MD, FMT_CSV, FMT_ASCII }; + +static Func parse_func(const char *s) { + if (!strcmp(s, "fr_sin_bam")) return F_SIN_BAM; + if (!strcmp(s, "fr_cos_bam")) return F_COS_BAM; + if (!strcmp(s, "fr_tan_bam")) return F_TAN_BAM; + if (!strcmp(s, "fr_sin")) return F_SIN; + if (!strcmp(s, "fr_cos")) return F_COS; + if (!strcmp(s, "fr_tan")) return F_TAN; + if (!strcmp(s, "FR_SinI")) return F_SINI; + if (!strcmp(s, "FR_CosI")) return F_COSI; + if (!strcmp(s, "FR_TanI")) return F_TANI; + if (!strcmp(s, "fr_sin_deg")) return F_SIN_DEG; + if (!strcmp(s, "fr_cos_deg")) return F_COS_DEG; + if (!strcmp(s, "fr_tan_deg")) return F_TAN_DEG; + if (!strcmp(s, "FR_acos")) return F_ACOS; + if (!strcmp(s, "FR_asin")) return F_ASIN; + if (!strcmp(s, "FR_atan")) return F_ATAN; + if (!strcmp(s, "FR_atan2")) return F_ATAN2; + if (!strcmp(s, "FR_log2")) return F_LOG2; + if (!strcmp(s, "FR_ln")) return F_LN; + if (!strcmp(s, "FR_log10")) return F_LOG10; + if (!strcmp(s, "FR_pow2")) return F_POW2; + if (!strcmp(s, "FR_EXP")) return F_EXP; + if (!strcmp(s, "FR_POW10")) return F_POW10; + if (!strcmp(s, "FR_sqrt")) return F_SQRT; + if (!strcmp(s, "FR_hypot")) return F_HYPOT; + if (!strcmp(s, "FR_hypot_fast8")) return F_HYPOT_FAST8; + return F_UNKNOWN; +} + +static const char *func_name(Func f) { + switch (f) { + case F_SIN_BAM: return "fr_sin_bam"; + case F_COS_BAM: return "fr_cos_bam"; + case F_TAN_BAM: return "fr_tan_bam"; + case F_SIN: return "fr_sin"; + case F_COS: return "fr_cos"; + case F_TAN: return "fr_tan"; + case F_SINI: return "FR_SinI"; + case F_COSI: return "FR_CosI"; + case F_TANI: return "FR_TanI"; + case F_SIN_DEG: return "fr_sin_deg"; + case F_COS_DEG: return "fr_cos_deg"; + case F_TAN_DEG: return "fr_tan_deg"; + case F_ACOS: return "FR_acos"; + case F_ASIN: return "FR_asin"; + case F_ATAN: return "FR_atan"; + case F_ATAN2: return "FR_atan2"; + case F_LOG2: return "FR_log2"; + case F_LN: return "FR_ln"; + case F_LOG10: return "FR_log10"; + case F_POW2: return "FR_pow2"; + case F_EXP: return "FR_EXP"; + case F_POW10: return "FR_POW10"; + case F_SQRT: return "FR_sqrt"; + case F_HYPOT: return "FR_hypot"; + case F_HYPOT_FAST8: return "FR_hypot_fast8"; + default: return "?"; + } +} + +static int is_sin(Func f) { return f == F_SIN_BAM || f == F_SIN || f == F_SINI || f == F_SIN_DEG; } +static int is_cos(Func f) { return f == F_COS_BAM || f == F_COS || f == F_COSI || f == F_COS_DEG; } +static int is_trig(Func f) { return f <= F_TAN_DEG; } + +/* Evaluate function. Returns raw s32 result and sets input_fp, expected, out_prec. */ +static s32 eval(Func f, double val, int radix, int out_radix, + double y_val, s32 *input_fp, double *expected, int *out_prec) +{ + s32 raw = 0; + + /* --- Trig functions (val = degrees) --- */ + if (is_trig(f)) { + double rad = val * M_PI / 180.0; + *out_prec = 16; + + if (is_sin(f)) *expected = qN(sin(rad), 16); + else if (is_cos(f)) *expected = qN(cos(rad), 16); + else *expected = qN(tan_ref(rad), 16); + + switch (f) { + case F_SIN_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_sin_bam(bam); + break; + } + case F_COS_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_cos_bam(bam); + break; + } + case F_TAN_BAM: { + u16 bam = (u16)((int)(val * 65536.0 / 360.0 + 0.5) & 0xFFFF); + *input_fp = (s32)bam; + raw = fr_tan_bam(bam); + break; + } + case F_SIN: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(sin(actual_rad), 16); + raw = fr_sin(rad_fp, (u16)radix); + break; + } + case F_COS: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(cos(actual_rad), 16); + raw = fr_cos(rad_fp, (u16)radix); + break; + } + case F_TAN: { + s32 rad_fp = tofix(rad, radix); + *input_fp = rad_fp; + double actual_rad = (double)rad_fp / (double)(1 << radix); + *expected = qN(tan_ref(actual_rad), 16); + raw = fr_tan(rad_fp, (u16)radix); + break; + } + case F_SINI: + *input_fp = (s32)(int)val; + raw = FR_SinI((int)val); + break; + case F_COSI: + *input_fp = (s32)(int)val; + raw = FR_CosI((int)val); + break; + case F_TANI: + *input_fp = (s32)(int)val; + raw = FR_TanI((s16)(int)val); + break; + case F_SIN_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + if (is_sin(f)) *expected = qN(sin(actual_rad2), 16); + else if (is_cos(f)) *expected = qN(cos(actual_rad2), 16); + else *expected = qN(tan_ref(actual_rad2), 16); + raw = fr_sin_deg(deg_fp, (u16)radix); + break; + } + case F_COS_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + *expected = qN(cos(actual_rad2), 16); + raw = fr_cos_deg(deg_fp, (u16)radix); + break; + } + case F_TAN_DEG: { + s32 deg_fp = tofix(val, radix); + *input_fp = deg_fp; + double actual_deg = (double)deg_fp / (double)(1 << radix); + double actual_rad2 = actual_deg * M_PI / 180.0; + *expected = qN(tan_ref(actual_rad2), 16); + raw = fr_tan_deg(deg_fp, (u16)radix); + break; + } + default: + break; + } + return raw; + } + + /* --- Inverse trig (val = input value, not degrees) --- */ + if (f == F_ACOS || f == F_ASIN || f == F_ATAN) { + *out_prec = out_radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_ACOS: + raw = FR_acos(inp, (u16)radix, (u16)out_radix); + *expected = qN(acos(val), out_radix); + break; + case F_ASIN: + raw = FR_asin(inp, (u16)radix, (u16)out_radix); + *expected = qN(asin(val), out_radix); + break; + case F_ATAN: + raw = FR_atan(inp, (u16)radix, (u16)out_radix); + *expected = qN(atan(val), out_radix); + break; + default: + break; + } + return raw; + } + + /* --- FR_atan2 (val = degrees on unit circle) --- */ + if (f == F_ATAN2) { + *out_prec = out_radix; + double rad = val * M_PI / 180.0; + s32 x = tofix(cos(rad), 15); + s32 y = tofix(sin(rad), 15); + *input_fp = tofix(val, radix); + raw = FR_atan2(y, x, (u16)out_radix); + double ref = atan2((double)y, (double)x); + *expected = qN(ref, out_radix); + return raw; + } + + /* --- Log functions (val = input value) --- */ + if (f == F_LOG2 || f == F_LN || f == F_LOG10) { + *out_prec = out_radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_LOG2: + raw = FR_log2(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log2(val), out_radix) : 0.0; + break; + case F_LN: + raw = FR_ln(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log(val), out_radix) : 0.0; + break; + case F_LOG10: + raw = FR_log10(inp, (u16)radix, (u16)out_radix); + *expected = (val > 0.0) ? qN(log10(val), out_radix) : 0.0; + break; + default: + break; + } + return raw; + } + + /* --- Power/exp functions (val = exponent) --- */ + if (f == F_POW2 || f == F_EXP || f == F_POW10) { + *out_prec = radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + + switch (f) { + case F_POW2: + raw = FR_pow2(inp, (u16)radix); + *expected = qN(pow(2.0, val), radix); + break; + case F_EXP: + raw = FR_EXP(inp, (u16)radix); + *expected = qN(exp(val), radix); + break; + case F_POW10: + raw = FR_POW10(inp, (u16)radix); + *expected = qN(pow(10.0, val), radix); + break; + default: + break; + } + return raw; + } + + /* --- FR_sqrt (val = input value) --- */ + if (f == F_SQRT) { + *out_prec = radix; + s32 inp = tofix(val, radix); + *input_fp = inp; + raw = FR_sqrt(inp, (u16)radix); + *expected = (val >= 0.0) ? qN(sqrt(val), radix) : 0.0; + return raw; + } + + /* --- FR_hypot / FR_hypot_fast8 (val = x, y_val = y) --- */ + if (f == F_HYPOT || f == F_HYPOT_FAST8) { + *out_prec = radix; + s32 x_fp = tofix(val, radix); + s32 y_fp = tofix(y_val, radix); + *input_fp = x_fp; + + if (f == F_HYPOT) + raw = FR_hypot(x_fp, y_fp, (u16)radix); + else + raw = FR_hypot_fast8(x_fp, y_fp); + + *expected = qN(hypot(val, y_val), radix); + return raw; + } + + /* fallback */ + *input_fp = 0; + *expected = 0.0; + *out_prec = 16; + return 0; +} + +/* Smart default increment based on function type */ +static double default_inc(Func f) { + if (is_trig(f) || f == F_ATAN2) + return 360.0 / 65536.0; /* ~0.0055 degrees */ + if (f == F_ACOS || f == F_ASIN) + return 1.0 / 32768.0; /* ~3.05e-5, matches r15 LSB */ + return 1.0 / 65536.0; /* ~1.53e-5, matches r16 LSB */ +} + +static void usage(void) { + fprintf(stderr, + "Usage: trig_neighborhood
      [options]\n" + "\n" + "Supported functions:\n" + "\n" + " Trig (input: degrees):\n" + " fr_sin_bam, fr_cos_bam, fr_tan_bam\n" + " fr_sin, fr_cos, fr_tan\n" + " FR_SinI, FR_CosI, FR_TanI\n" + " fr_sin_deg, fr_cos_deg, fr_tan_deg\n" + "\n" + " Inverse trig (input: value):\n" + " FR_acos, FR_asin, FR_atan\n" + "\n" + " Inverse trig (input: degrees on unit circle):\n" + " FR_atan2\n" + "\n" + " Logarithmic (input: value):\n" + " FR_log2, FR_ln, FR_log10\n" + "\n" + " Exponential (input: exponent):\n" + " FR_pow2, FR_EXP, FR_POW10\n" + "\n" + " Other:\n" + " FR_sqrt (input: value)\n" + " FR_hypot, FR_hypot_fast8 (input: x, --y for y)\n" + "\n" + " center: center of sweep (degrees for trig/atan2, value otherwise)\n" + " half: number of samples each side of center\n" + "\n" + "Options:\n" + " --inc increment (default depends on function)\n" + " --fmt md|csv|ascii output format (default: md)\n" + " --radix input radix for fixed-point (default: 16)\n" + " --out_radix output radix for inv trig/log (default: 16)\n" + " --y fixed y value for hypot functions (default: 0.0)\n" + "\n" + "Examples:\n" + " trig_neighborhood fr_cos -90 15\n" + " trig_neighborhood fr_sin -360 10 --fmt csv\n" + " trig_neighborhood fr_tan 89.5 20 --inc 0.01\n" + " trig_neighborhood fr_sin_deg 45 10 --radix 8\n" + " trig_neighborhood FR_asin 0.5 15 --radix 15 --out_radix 16\n" + " trig_neighborhood FR_log2 1.0 15 --inc 0.01\n" + " trig_neighborhood FR_atan2 90 15\n" + " trig_neighborhood FR_hypot_fast8 100 15 --y 50 --radix 8\n" + ); +} + +int main(int argc, char **argv) { + if (argc < 4) { usage(); return 1; } + + Func func = parse_func(argv[1]); + if (func == F_UNKNOWN) { + fprintf(stderr, "Unknown function: %s\n", argv[1]); + usage(); + return 1; + } + + double center = atof(argv[2]); + int half = atoi(argv[3]); + double inc = -1.0; /* sentinel: use default */ + Fmt fmt = FMT_MD; + int radix = 16; + int out_radix = 16; + double y_val = 0.0; + + for (int i = 4; i < argc; i++) { + if (!strcmp(argv[i], "--inc") && i + 1 < argc) + inc = atof(argv[++i]); + else if (!strcmp(argv[i], "--fmt") && i + 1 < argc) { + i++; + if (!strcmp(argv[i], "csv")) fmt = FMT_CSV; + else if (!strcmp(argv[i], "ascii")) fmt = FMT_ASCII; + else fmt = FMT_MD; + } + else if (!strcmp(argv[i], "--radix") && i + 1 < argc) + radix = atoi(argv[++i]); + else if (!strcmp(argv[i], "--out_radix") && i + 1 < argc) + out_radix = atoi(argv[++i]); + else if (!strcmp(argv[i], "--y") && i + 1 < argc) + y_val = atof(argv[++i]); + } + + if (inc < 0.0) inc = default_inc(func); + + const char *cols[] = {"sample", "val", "input_fp", "radix", "raw_got", "raw_exp", "expected", "got", "abs_err", "pct_err"}; + int ncols = 10; + + switch (fmt) { + case FMT_CSV: + for (int c = 0; c < ncols; c++) + printf("%s%s", cols[c], c < ncols - 1 ? "," : "\n"); + break; + case FMT_MD: + printf("**%s** center=%.6f, +/-%d samples, inc=%.6g, radix=%d", + func_name(func), center, half, inc, radix); + if (out_radix != radix) + printf(", out_radix=%d", out_radix); + if (func == F_HYPOT || func == F_HYPOT_FAST8) + printf(", y=%.6f", y_val); + printf("\n\n"); + printf("|"); + for (int c = 0; c < ncols; c++) printf(" %s |", cols[c]); + printf("\n|"); + for (int c = 0; c < ncols; c++) printf("---|"); + printf("\n"); + break; + case FMT_ASCII: + printf("# %s center=%.6f +/-%d inc=%.6g radix=%d", + func_name(func), center, half, inc, radix); + if (out_radix != radix) + printf(" out_radix=%d", out_radix); + if (func == F_HYPOT || func == F_HYPOT_FAST8) + printf(" y=%.6f", y_val); + printf("\n"); + printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n", + cols[0], cols[1], cols[2], cols[3], cols[4], cols[5], cols[6], cols[7], cols[8], cols[9]); + printf("%8s %12s %12s %6s %10s %10s %12s %12s %12s %12s\n", + "--------", "------------", "------------", "------", + "----------", "----------", + "------------", "------------", "------------", "------------"); + break; + } + + for (int k = -half; k <= half; k++) { + double val = center + k * inc; + s32 input_fp; + double expected; + int out_prec; + s32 raw = eval(func, val, radix, out_radix, y_val, &input_fp, &expected, &out_prec); + s32 raw_exp = (s32)floor(ldexp(expected, out_prec) + 0.5); + double got = frd(raw, out_prec); + double ae = fabs(got - expected); + double pe = (expected != 0.0) ? ae / fabs(expected) * 100.0 : (ae != 0.0 ? 100.0 : 0.0); + + switch (fmt) { + case FMT_CSV: + printf("%d,%.6g,%d,%d,%d,%d,%.6f,%.6f,%.6f,%.4f%%\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + case FMT_MD: + printf("| %d | %.6g | %d | %d | %d | %d | %.6f | %.6f | %.6f | %.4f%% |\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + case FMT_ASCII: + printf("%8d %12.6g %12d %6d %10d %10d %12.6f %12.6f %12.6f %11.4f%%\n", + k, val, input_fp, radix, raw, raw_exp, expected, got, ae, pe); + break; + } + } + + return 0; +}