diff --git a/.gitignore b/.gitignore
index 929ddf3..bc083e6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 .vscode/
 scratch*
 .ipynb_checkpoints/
+flamegraph.svg
+perf.data
diff --git a/AGENTS.md b/AGENTS.md
index 4ce5955..49b4371 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,4 +1,31 @@
 # Agent Instructions
 
-- use `cargo fmt`, `cargo clippy` and `cargo test`
-- only commit when all these checks are passing
+## Development Workflow
+
+* This is a Rust project, ALWAYS use `cargo fmt`, `cargo clippy` and `cargo test` and fix issues.
+* When the code is formatted, linted and successfully tested, ALWAYS commit your changes.
+
+## Agent Behavior
+
+* ALWAYS ask if there is any ambiguity in the request or multiple viable solutions.
+* NEVER guess, think through issues, read docs and existing code, discuss with the user.
+* NEVER assume anything if you are not confident or need more details.
+* If documentation is missing, try installing it or ask the user for help.
+
+## Code Quality
+
+* Avoid data duplication: NEVER hardcode a numeric value or string in two places, define constants.
+* Avoid code duplication: NEVER write the same logic twice, factor out a common generic function.
+
+* Before writing any new non-trivial code, ALWAYS check whether similar functionality already exists.
+* Before writing any new non-trivial code, ALWAYS consider using existing popular third-party crates.
+* When actually writing new code, ALWAYS try to write it in a reusable and extensible way from the start.
+
+* ALWAYS strive towards generic, modular, maintainable code.
+* ALWAYS prioritize good architecture and clarity over performance unless told otherwise..
+* ALWAYS write testable code with easily mockable inputs.
+* ALWAYS write small functions that ideally fit on a single screen.
+* ALWAYS avoid deep nesting, use more helper functions instead.
+* ALWAYS ask for permission to let a function be untested.
+* ALWAYS plan out suitable tests for the intended design BEFORE implementation.
+* ALWAYS strive to test all code paths, covering edge cases and boundary input values.
diff --git a/Cargo.lock b/Cargo.lock
index bfd25fb..84a1ad2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,6 +1,15 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
+
+[[package]]
+name = "addr2line"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b"
+dependencies = [
+ "gimli",
+]
 
 [[package]]
 name = "adler2"
@@ -8,6 +17,28 @@ version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
 
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "getrandom 0.3.4",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aligned-vec"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
+dependencies = [
+ "equator",
+]
+
 [[package]]
 name = "android-tzdata"
 version = "0.1.1"
@@ -79,12 +110,33 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb"
 
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
 [[package]]
 name = "autocfg"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
 
+[[package]]
+name = "backtrace"
+version = "0.3.76"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb531853791a215d7c62a30daf0dde835f381ab5de4589cfe7c649d2cbe92bd6"
+dependencies = [
+ "addr2line",
+ "cfg-if",
+ "libc",
+ "miniz_oxide",
+ "object",
+ "rustc-demangle",
+ "windows-link",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -248,6 +300,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "cpp_demangle"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0667304c32ea56cb4cd6d2d7c0cfe9a2f8041229db8c033af7f8d69492429def"
+dependencies = [
+ "cfg-if",
+]
+
 [[package]]
 name = "crc32fast"
 version = "1.4.2"
@@ -313,6 +374,15 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "debugid"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d"
+dependencies = [
+ "uuid",
+]
+
 [[package]]
 name = "dirs"
 version = "5.0.1"
@@ -361,6 +431,48 @@ version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
 
+[[package]]
+name = "equator"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
+dependencies = [
+ "equator-macro",
+]
+
+[[package]]
+name = "equator-macro"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
+
 [[package]]
 name = "fdeflate"
 version = "0.3.7"
@@ -370,6 +482,18 @@ dependencies = [
  "simd-adler32",
 ]
 
+[[package]]
+name = "findshlibs"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64"
+dependencies = [
+ "cc",
+ "lazy_static",
+ "libc",
+ "winapi",
+]
+
 [[package]]
 name = "flate2"
 version = "1.0.35"
@@ -460,6 +584,18 @@ dependencies = [
  "wasi",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
 [[package]]
 name = "gif"
 version = "0.12.0"
@@ -470,6 +606,18 @@ dependencies = [
  "weezl",
 ]
 
+[[package]]
+name = "gimli"
+version = "0.32.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7"
+
+[[package]]
+name = "hashbrown"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f467dd6dccf739c208452f8014c75c18bb8301b050ad1cfb27153803edb0f51"
+
 [[package]]
 name = "heck"
 version = "0.5.0"
@@ -482,6 +630,12 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
 
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
 [[package]]
 name = "iana-time-zone"
 version = "0.1.61"
@@ -519,6 +673,45 @@ dependencies = [
  "png",
 ]
 
+[[package]]
+name = "indexmap"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
+dependencies = [
+ "equivalent",
+ "hashbrown",
+]
+
+[[package]]
+name = "inferno"
+version = "0.11.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
+dependencies = [
+ "ahash",
+ "indexmap",
+ "is-terminal",
+ "itoa",
+ "log",
+ "num-format",
+ "once_cell",
+ "quick-xml",
+ "rgb",
+ "str_stack",
+]
+
+[[package]]
+name = "is-terminal"
+version = "0.4.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46"
+dependencies = [
+ "hermit-abi 0.5.2",
+ "libc",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.1"
@@ -534,6 +727,12 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
 [[package]]
 name = "jpeg-decoder"
 version = "0.3.1"
@@ -582,12 +781,42 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "log"
 version = "0.4.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
 
+[[package]]
+name = "memchr"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
+
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "miniz_oxide"
 version = "0.8.4"
@@ -598,6 +827,17 @@ dependencies = [
  "simd-adler32",
 ]
 
+[[package]]
+name = "nix"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b"
+dependencies = [
+ "bitflags 1.3.2",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "num-bigint"
 version = "0.4.6"
@@ -617,6 +857,16 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "num-format"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3"
+dependencies = [
+ "arrayvec",
+ "itoa",
+]
+
 [[package]]
 name = "num-integer"
 version = "0.1.46"
@@ -652,10 +902,19 @@ version = "1.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
 dependencies = [
- "hermit-abi",
+ "hermit-abi 0.3.9",
  "libc",
 ]
 
+[[package]]
+name = "object"
+version = "0.37.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "once_cell"
 version = "1.20.3"
@@ -759,6 +1018,28 @@ dependencies = [
  "miniz_oxide",
 ]
 
+[[package]]
+name = "pprof"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "38a01da47675efa7673b032bf8efd8214f1917d89685e07e395ab125ea42b187"
+dependencies = [
+ "aligned-vec",
+ "backtrace",
+ "cfg-if",
+ "findshlibs",
+ "inferno",
+ "libc",
+ "log",
+ "nix",
+ "once_cell",
+ "smallvec",
+ "spin",
+ "symbolic-demangle",
+ "tempfile",
+ "thiserror 2.0.18",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.93"
@@ -768,6 +1049,15 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "quick-xml"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "quote"
 version = "1.0.38"
@@ -777,17 +1067,44 @@ dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
 [[package]]
 name = "redox_users"
 version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
  "libredox",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "rgb"
+version = "0.8.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "rustc-demangle"
+version = "0.1.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d"
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
 [[package]]
 name = "rustc_version"
 version = "0.4.1"
@@ -797,6 +1114,19 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "rustix"
+version = "1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11181fbabf243db407ef8df94a6ce0b2f9a733bd8be4ad02b4eda9602296cac8"
+dependencies = [
+ "bitflags 2.8.0",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "rustversion"
 version = "1.0.19"
@@ -812,6 +1142,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "semver"
 version = "1.0.25"
@@ -830,12 +1166,62 @@ version = "0.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
 
+[[package]]
+name = "smallvec"
+version = "1.15.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
+
+[[package]]
+name = "spin"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d5fe4ccb98d9c292d56fec89a5e07da7fc4cf0dc11e156b41793132775d3e591"
+dependencies = [
+ "lock_api",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "str_stack"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
+
 [[package]]
 name = "strsim"
 version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
+[[package]]
+name = "symbolic-common"
+version = "12.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f3cdeaae6779ecba2567f20bf7716718b8c4ce6717c9def4ced18786bb11ea"
+dependencies = [
+ "debugid",
+ "memmap2",
+ "stable_deref_trait",
+ "uuid",
+]
+
+[[package]]
+name = "symbolic-demangle"
+version = "12.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "672c6ad9cb8fce6a1283cc9df9070073cccad00ae241b80e3686328a64e3523b"
+dependencies = [
+ "cpp_demangle",
+ "rustc-demangle",
+ "symbolic-common",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.98"
@@ -847,13 +1233,35 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "tempfile"
+version = "3.23.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16"
+dependencies = [
+ "fastrand",
+ "getrandom 0.3.4",
+ "once_cell",
+ "rustix",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "thiserror"
 version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
 dependencies = [
- "thiserror-impl",
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
 ]
 
 [[package]]
@@ -867,6 +1275,17 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "tilezz"
 version = "0.0.3"
@@ -882,6 +1301,8 @@ dependencies = [
  "num_cpus",
  "paste",
  "plotters",
+ "pprof",
+ "rustc-hash",
 ]
 
 [[package]]
@@ -902,6 +1323,22 @@ version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
+[[package]]
+name = "uuid"
+version = "1.23.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
 [[package]]
 name = "walkdir"
 version = "2.5.0"
@@ -918,6 +1355,15 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "wasip2"
+version = "1.0.3+wasi-0.2.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
+dependencies = [
+ "wit-bindgen",
+]
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.100"
@@ -1032,6 +1478,12 @@ dependencies = [
  "windows-targets 0.52.6",
 ]
 
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
@@ -1180,6 +1632,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
 [[package]]
 name = "yeslogic-fontconfig-sys"
 version = "6.0.0"
@@ -1190,3 +1648,23 @@ dependencies = [
  "once_cell",
  "pkg-config",
 ]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
diff --git a/Cargo.toml b/Cargo.toml
index e1935cf..6275918 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,8 +27,12 @@ clap = { version = "4.5.27", features = ["derive"], optional = true }
 num_cpus = { version = "1.16.0", optional = true }
 crossbeam = { version = "0.8.4", optional = true }
 itertools = "0.14.0"
+rustc-hash = "2.1.2"
+pprof = { version = "0.15.0", features = ["flamegraph"], optional = true }
 
 [features]
 default = ["plotters", "examples"]
 plotters = ["dep:plotters"]
 examples = ["plotters", "dep:clap", "dep:crossbeam", "dep:num_cpus"]
+pprof = ["dep:pprof"]
+cyclotomic_intersect = []
diff --git a/docs/sign-only-geometry-refactor.md b/docs/sign-only-geometry-refactor.md
new file mode 100644
index 0000000..4dcc380
--- /dev/null
+++ b/docs/sign-only-geometry-refactor.md
@@ -0,0 +1,322 @@
+# Sign-Only Geometry Refactor Plan
+
+## Background
+
+The Redelmeier patch enumeration algorithm requires segment-crossing checks during growth.
+Two implementations exist:
+
+1. **Pos4/SR fast path** — uses hand-coded `[i32; 4]` positions with `(i64, i64)` SR type
+   representing `a + b*sqrt(3)` with no denominator. Pure integer arithmetic.
+2. **Generic cyclotomic path** — uses `geometry::intersect<ZZ12>` operating on `ZZ::Real = Z12`
+   which stores `Ratio<i64>` coefficients. Generic but ~46x slower.
+
+### Profile Comparison (hexagon patches, size 8)
+
+**Pos4 path: 290ms**
+
+| %    | Function                                    |
+|------|---------------------------------------------|
+| 54.2 | `check_new_segments_cross_existing_pos4`    |
+| 9.1  | `lex_min_rot`                               |
+| 5.1  | Vec allocation                              |
+| 2.8  | HashMap insert                              |
+
+**Cyclotomic path: 13.5s**
+
+| %    | Function                                    |
+|------|---------------------------------------------|
+| 6.1  | `Ratio::reduce` (GCD normalization)         |
+| 5.8  | `zz_partial_signum_2_sym`                   |
+| 5.1  | `zz6_mul` (Ratio multiply)                  |
+| 4.6  | `Ratio::from_f64`                           |
+| 3.9  | `Ratio::add`                                |
+| 3.9  | `Ratio::mul`                                |
+| 3.7  | `check_segments_cross_cyclotomic`           |
+| 3.6  | `geometry::intersect`                       |
+| 3.6  | `Ratio::sub`                                |
+| 2.7  | `linalg::wedge`                             |
+| 2.0  | `linalg::is_ccw`                            |
+
+The top 7 hotspots are all `Ratio<i64>` operations. Together they account for ~31% of
+total samples, and they propagate through every downstream function (`wedge`, `is_ccw`,
+`intersect`, `signum`).
+
+## Why Scaled i64 Coefficients Don't Work for General Ring Types
+
+Z12 values have the form `(a + b*sqrt(3)) / l` where `l = scaling_fac = 2`.
+
+The coefficients stored in `Z12` are numerators with implicit denominator 2.
+When you multiply two such values:
+
+```
+(a + b*sqrt(3))/2  *  (c + d*sqrt(3))/2  =  (ac + 3bd + (ad+bc)*sqrt(3)) / 4
+```
+
+The denominator **doubles** from 2 to 4. Crucially, the numerator isn't always divisible
+by the scaling factor. Counterexample:
+
+```
+sqrt(3)/2 * sqrt(3)/2 = 3/4
+```
+
+Here `zz6_mul` produces numerator `3` for the first coefficient — not divisible by `l=2`.
+So you cannot normalize back to denominator 2 with integer-only coefficients.
+The denominator must grow, which is exactly what `Ratio<i64>` handles.
+
+**This is why the manual `GaussInt<i64>`-based ZZ12 did not help**: even though `ZZ12`
+itself used integer coefficients, `ZZ12::Real = Z12` is **always** the macro-generated
+`GaussInt<Ratio<i64>>` type. The entire geometry pipeline (`wedge` -> `dot` -> `is_ccw`
+-> `is_between` -> `signum`) operates on `Z12`, not `ZZ12`. So Ratio overhead was
+unavoidable.
+
+## Key Insight: Sign-Only Geometry
+
+The geometry pipeline only needs **signs** of Real values:
+
+- `is_ccw` checks `wedge().is_positive()`
+- `is_between` checks `wedge().is_zero() && dot().is_positive()`
+- `is_colinear` checks `(a*re + b*im + c).is_zero()`
+
+Since all denominators in the ring representation are positive,
+**`sign(numerator/denom) = sign(numerator)`**.
+
+This means we can work with **raw i64 numerators** and skip Ratio entirely.
+No division, no GCD, no normalization.
+
+The multiplication functions (`zz6_mul`, `zz8_mul`, `zz12_mul`, etc.) are **already
+generic** over `T: IntRing + FromPrimitive` — they work with `i64` directly.
+
+## Ring Classification
+
+### Category A: Integer root squares — exact integer sign
+
+| Ring  | Root squares           | # roots | Sign function          | `signum_sum_sqrt_expr` works? |
+|-------|------------------------|---------|------------------------|-------------------------------|
+| ZZ4   | `[1]`                  | 1       | `signum_1_sym`         | trivial (sign of single int)  |
+| ZZ6   | `[1, 3]`               | 2       | `signum_2_sym`         | yes: `sign(a + b*sqrt(3))`    |
+| ZZ8   | `[1, 2]`               | 2       | `signum_2_sym`         | yes: `sign(a + b*sqrt(2))`    |
+| ZZ12  | `[1, 3]` (same as ZZ6) | 2       | `signum_2_sym`         | yes                           |
+| ZZ24  | `[1, 2, 3, 6]`         | 4       | `signum_4_sym`         | yes: k=1, l=m*n (2*3=6)       |
+
+For these rings, the existing `signum_sum_sqrt_expr_{2,4}` functions work directly with
+`i64` arguments. The sign computation is **exact** and uses only integer comparisons
+(with squaring to avoid irrationals).
+
+### Category B: Nested radical roots — f64 sign, but still no Ratio
+
+| Ring  | Root squares include              | # roots | Current sign function | Proposed strategy              |
+|-------|-----------------------------------|---------|-----------------------|--------------------------------|
+| ZZ10  | `2(5-sqrt(5))`                    | 4       | f64 fallback          | i64 mul -> f64 sign            |
+| ZZ16  | `2+sqrt(2)`, `2(2+sqrt(2))`       | 4       | `signum_4_sym`*       | i64 mul -> f64 sign            |
+| ZZ20  | same as ZZ10                      | 4       | f64 fallback          | i64 mul -> f64 sign            |
+| ZZ30  | `2(5-sqrt(5))`, etc.              | 8       | f64 fallback          | i64 mul -> f64 sign            |
+| ZZ32  | `2+sqrt(2+sqrt(2))`, etc.         | 8       | f64 fallback          | i64 mul -> f64 sign            |
+| ZZ60  | all of the above                  | 8       | f64 fallback          | i64 mul -> f64 sign            |
+
+*Note: ZZ16 currently uses `signum_4_sym` with `Ratio<i64>` approximations of irrational
+root squares (e.g., `from_f64(2+sqrt(2))`). This is **already inexact** — the Ratio
+is an approximation of an irrational. Switching to f64 sign from i64 coefficients would
+be equally accurate and simpler.
+
+For Category B, the sign check converts i64 coefficients -> f64 -> signum. This is
+identical in accuracy to the current approach but avoids all Ratio overhead.
+
+### Summary
+
+| Category | Ratio overhead eliminated | Sign accuracy     |
+|----------|--------------------------|-------------------|
+| A        | ALL (mul + sign)         | exact             |
+| B        | ALL (mul only, sign f64) | same as current   |
+
+## How Pos4/SR Already Does This
+
+The Pos4/SR approach is a working proof of concept for ZZ12:
+
+- `SR(i64, i64)` represents `a + b*sqrt(3)` with no denominator
+- Multiplication: `SR(a*c + 3*b*d, a*d + b*c)` — stays in integers
+- Sign check (`sign_sqrt3`): compares `a^2` vs `3*b^2` via squaring — avoids irrationals
+- `Pos4 = [i32; 4]` encodes scaled real/imaginary parts directly
+
+This is exactly the integer-numerator approach, but hardcoded for ZZ12 only.
+
+## Proposed Design
+
+### Trait for sign-only real arithmetic
+
+```rust
+/// Integer-only representation of a cyclotomic real value for sign-only operations.
+/// Stores raw i64 numerators without the common denominator.
+trait SignReal: Clone + Copy + Sized {
+    fn zero() -> Self;
+    fn add(&self, other: &Self) -> Self;
+    fn sub(&self, other: &Self) -> Self;
+    fn mul(&self, other: &Self) -> Self;
+    fn neg(&self) -> Self;
+    fn is_zero(&self) -> bool;
+    fn is_positive(&self) -> bool;
+}
+```
+
+### Generic geometry functions
+
+Rewrite `wedge`, `dot`, `is_ccw`, `is_between`, `intersect` to be generic over `SignReal`:
+
+```rust
+fn wedge_fast<R: SignReal>(re: impl Fn(&Pos) -> R, im: impl Fn(&Pos) -> R,
+                            p1: &Pos, p2: &Pos) -> R {
+    re(p1).mul(&im(p2)).sub(&im(p1).mul(&re(p2)))
+}
+
+fn is_ccw_fast<R: SignReal>(re: impl Fn(&Pos) -> R, im: impl Fn(&Pos) -> R,
+                             p: &Pos, a: &Pos, b: &Pos) -> bool {
+    let v = sub_pos(a, p);
+    let w = sub_pos(b, p);
+    wedge_fast(re, im, &v, &w).is_positive()
+}
+
+fn intersect_fast<R: SignReal>(re: impl Fn(&Pos) -> R, im: impl Fn(&Pos) -> R,
+                                a: &Pos, b: &Pos, c: &Pos, d: &Pos) -> bool {
+    // same logic as geometry::intersect, using SignReal operations
+}
+```
+
+### Per-ring SignReal implementations
+
+Each ring implements `SignReal` on a fixed-size i64 array:
+
+```rust
+// ZZ12: 2 roots, i64 coefficients for a + b*sqrt(3)
+#[derive(Clone, Copy)]
+struct Z12Sign([i64; 2]);
+
+impl SignReal for Z12Sign {
+    fn mul(&self, other: &Self) -> Self {
+        let [a, b] = self.0;
+        let [c, d] = other.0;
+        Z12Sign([a*c + 3*b*d, a*d + b*c])  // zz6_mul::<i64>()
+    }
+
+    fn is_positive(&self) -> bool {
+        let [a, b] = self.0;
+        signum_sum_sqrt_expr_2(a, 1i64, b, 3i64) > 0
+    }
+    // ...
+}
+
+// ZZ24: 4 roots, i64 coefficients for a + b*sqrt(2) + c*sqrt(3) + d*sqrt(6)
+#[derive(Clone, Copy)]
+struct Z24Sign([i64; 4]);
+
+impl SignReal for Z24Sign {
+    fn mul(&self, other: &Self) -> Self {
+        // use zz24_mul::<i64>()
+    }
+    fn is_positive(&self) -> bool {
+        // use signum_sum_sqrt_expr_4 with integer root squares
+    }
+}
+
+// ZZ16: 4 roots with nested radicals -> f64 sign fallback
+#[derive(Clone, Copy)]
+struct Z16Sign([i64; 4]);
+
+impl SignReal for Z16Sign {
+    fn is_positive(&self) -> bool {
+        // compute value as f64 from i64 coefficients and root values
+        let val: f64 = self.0.iter().zip(ROOT_VALUES).map(|(c, r)| *c as f64 * r).sum();
+        val > 0.0
+    }
+}
+```
+
+### Position storage
+
+The growing algorithm builds positions by starting at origin and adding unit vectors.
+Unit coefficients are already integers (from `ccw_unit_coeffs`). Track positions as
+i64 arrays to avoid Ratio entirely:
+
+```rust
+// Generic position for any ring with N symbolic roots
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+struct IntPos<const N: usize> {
+    // [re_0, re_1, ..., re_{N-1}, im_0, im_1, ..., im_{N-1}]
+    coeffs: [i64; 2 * N],
+}
+
+impl<const N: usize> IntPos<N> {
+    fn add_unit(&mut self, direction: i8, unit_coeffs: &[[i64; 2]]) {
+        for i in 0..N {
+            self.coeffs[i] += unit_coeffs[direction as usize][0]; // real part
+            self.coeffs[N + i] += unit_coeffs[direction as usize][1]; // imag part
+        }
+    }
+}
+```
+
+### Integration with existing code
+
+The `SignReal` implementations and generic geometry functions would live in a new module
+(e.g., `src/cyclotomic/sign_geom.rs`). The growing algorithm would use `intersect_fast`
+instead of `geometry::intersect` when the feature is enabled.
+
+The existing generic `geometry::intersect` would remain unchanged for non-performance-
+critical use cases (tests, one-off computations, etc.).
+
+## What This Eliminates
+
+| Hotspot (current profile)    | After refactor                     |
+|------------------------------|------------------------------------|
+| `Ratio::reduce` (6.1%)      | eliminated - i64 arithmetic        |
+| `Ratio::from_f64` (4.6%)    | eliminated - constants are i64     |
+| `Ratio::add/mul/sub` (~11%) | eliminated - i64 ops               |
+| `Ratio::cmp` (1.5%)         | eliminated - no Ord needed         |
+| `zz_partial_signum_2_sym`    | much faster - direct i64 sign      |
+| `zz6_mul` (Ratio multiply)  | much faster - i64 multiply         |
+
+Expected result: near-Pos4 performance for Category A rings, significant improvement
+for Category B rings.
+
+## Overflow Considerations
+
+i64 overflow must be considered for large patch sizes or rings with many roots.
+
+**ZZ12 estimation** (scaling_fac = 2, 2 roots):
+- Initial numerators: ~2 (from unit coefficients)
+- After k multiplications: ~O(k^2) growth (each mul adds products of 2 terms)
+- For size-8 patches (~7 multiplications in path): numerators ~O(100)
+- Wedge/dot products involve 2 multiplications: ~O(10,000)
+- Well within i64 range (9.2e18)
+
+**ZZ60 estimation** (scaling_fac = 16, 8 roots):
+- Much faster growth due to 8-root multiplication formula
+- May need i128 for safety, or early rescaling
+- Should benchmark to determine practical limits
+
+Mitigation options:
+- Use `i128` for safety (2x memory but still no Ratio/GCD)
+- Add overflow checks with `checked_mul` in debug builds
+- Limit patch sizes per ring based on empirical overflow testing
+
+## Implementation Steps
+
+1. **Create `SignReal` trait and generic geometry functions** in `sign_geom.rs`
+2. **Implement `SignReal` for Category A rings** (ZZ4, ZZ6, ZZ8, ZZ12, ZZ24)
+   using exact integer sign functions
+3. **Implement `SignReal` for Category B rings** (ZZ10, ZZ16, ZZ20, ZZ30, ZZ32, ZZ60)
+   using f64 sign fallback from i64 coefficients
+4. **Create `IntPos<N>` generic position type** with unit-based construction
+5. **Integrate with growing algorithm** — use `intersect_fast` + `IntPos` instead of
+   `geometry::intersect` + Ratio-based positions
+6. **Benchmark all ring types** and compare against current implementation
+7. **Remove Pos4/SR specialized code** once generic path is fast enough
+
+## Open Design Questions
+
+- **Position type in the growing algorithm**: Should `IntPos<N>` replace the current
+  `ZZ12` positions entirely, or should it be a parallel representation used only for
+  geometry?
+- **Macro vs manual implementation**: Should `SignReal` implementations be generated by
+  macro (like `impl_symnum!`) or written manually (only ~10 types)?
+- **Long-term refactor**: Should the entire ring type hierarchy be made parametric over
+  the scalar type (`Ratio<i64>` vs `i64`)? This would be the cleanest design but is a
+  massive refactor touching `symnum.rs`, `types.rs`, and all downstream code.
diff --git a/src/bin/patch_bench.rs b/src/bin/patch_bench.rs
new file mode 100644
index 0000000..1756f5e
--- /dev/null
+++ b/src/bin/patch_bench.rs
@@ -0,0 +1,99 @@
+use std::collections::BTreeMap;
+use std::time::Instant;
+
+use clap::Parser;
+use rustc_hash::FxHashSet;
+use tilezz::cyclotomic::ZZ12;
+use tilezz::intgeom::growing::{grow_redelmeier, grow_redelmeier_free};
+use tilezz::intgeom::rat::Rat;
+use tilezz::intgeom::snake::Snake;
+use tilezz::intgeom::tiles;
+
+#[cfg(feature = "pprof")]
+use pprof::ProfilerGuardBuilder;
+
+#[derive(Parser)]
+#[command(
+    name = "patch_bench",
+    about = "Benchmark Redelmeier patch enumeration - ZZ12 only"
+)]
+struct Args {
+    #[arg(long, default_value_t = 9)]
+    max_size: usize,
+
+    #[arg(long, value_enum, default_value = "hexagon")]
+    tile: TileShape,
+
+    #[arg(long, default_value_t = false)]
+    free: bool,
+}
+
+#[derive(Clone, Debug, clap::ValueEnum)]
+enum TileShape {
+    Hexagon,
+    Square,
+    Triangle,
+    Spectre,
+}
+
+fn make_seed(shape: &TileShape) -> Rat<ZZ12> {
+    let snake: Snake<ZZ12> = match shape {
+        TileShape::Hexagon => tiles::hexagon(),
+        TileShape::Square => tiles::square(),
+        TileShape::Triangle => tiles::triangle(),
+        TileShape::Spectre => tiles::spectre(),
+    };
+    Rat::from_unchecked(&snake)
+}
+
+fn run_redelmeier(
+    seed: Rat<ZZ12>,
+    max_size: usize,
+    free: bool,
+) -> BTreeMap<usize, FxHashSet<Rat<ZZ12>>> {
+    if free {
+        grow_redelmeier_free(&seed, max_size)
+    } else {
+        grow_redelmeier(&seed, max_size)
+    }
+}
+
+fn main() {
+    let args = Args::parse();
+
+    #[cfg(feature = "pprof")]
+    let guard = ProfilerGuardBuilder::default()
+        .frequency(1000)
+        .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+        .build()
+        .unwrap();
+
+    let seed = make_seed(&args.tile);
+
+    let full_label = format!("{:?} [ZZ12]", args.tile);
+    let label_free = if args.free { " (free)" } else { "" };
+
+    let t0 = Instant::now();
+    let final_results = run_redelmeier(seed, args.max_size, args.free);
+    let elapsed = t0.elapsed();
+
+    println!("=== Redelmeier - {}{} ===", full_label, label_free);
+    let mut total = 0usize;
+    for k in 1..=args.max_size {
+        let count = final_results.get(&k).map(|s| s.len()).unwrap_or(0);
+        println!("  size {:>3}: {:>6} patches", k, count);
+        total += count;
+    }
+    println!("  total: {:>6} patches", total);
+    println!("  time:  {:.2?}", elapsed);
+
+    #[cfg(feature = "pprof")]
+    {
+        if let Ok(report) = guard.report().build() {
+            let flamegraph_file = "flamegraph.svg";
+            let mut file = std::fs::File::create(flamegraph_file).unwrap();
+            report.flamegraph(&mut file).unwrap();
+            eprintln!("\nFlame graph written to {}", flamegraph_file);
+        }
+    }
+}
diff --git a/src/bin/patch_growth.rs b/src/bin/patch_growth.rs
new file mode 100644
index 0000000..85b531d
--- /dev/null
+++ b/src/bin/patch_growth.rs
@@ -0,0 +1,249 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::time::Instant;
+
+use clap::Parser;
+
+use tilezz::cyclotomic::{IsComplex, IsRingOrField, Units, ZZ12, ZZ4};
+use tilezz::intgeom::rat::Rat;
+use tilezz::intgeom::snake::Snake;
+use tilezz::intgeom::tiles;
+use tilezz::intgeom::tileset::{GlueStats, TileSet};
+
+#[derive(Parser)]
+#[command(
+    name = "patch_growth",
+    about = "Enumerate tile patches by incremental growth"
+)]
+struct Args {
+    /// Seed tile shape
+    #[arg(long, value_enum, default_value = "hexagon")]
+    tile: TileShape,
+
+    /// Maximum patch size (number of tiles)
+    #[arg(long, default_value_t = 6)]
+    max_size: usize,
+
+    /// Cyclotomic ring (ZZ4 only supports square tiles)
+    #[arg(long, value_enum, default_value = "zz12")]
+    ring: RingChoice,
+
+    /// Print per-pair glue statistics
+    #[arg(long)]
+    verbose: bool,
+
+    /// Validate results against brute force (caps max_size at 4)
+    #[arg(long)]
+    validate: bool,
+}
+
+#[derive(Clone, Debug, clap::ValueEnum)]
+enum TileShape {
+    Hexagon,
+    Square,
+    Triangle,
+    Spectre,
+}
+
+#[derive(Clone, Debug, clap::ValueEnum)]
+enum RingChoice {
+    ZZ4,
+    ZZ12,
+}
+
+fn seed_tile_zz4(shape: &TileShape) -> Rat<ZZ4> {
+    match shape {
+        TileShape::Square => Rat::from_unchecked(&tiles::square::<ZZ4>()),
+        _ => {
+            eprintln!(
+                "error: {:?} tile not supported with --ring zz4 (only square)",
+                shape
+            );
+            std::process::exit(1);
+        }
+    }
+}
+
+fn seed_tile_zz12(shape: &TileShape) -> Rat<ZZ12> {
+    let snake: Snake<ZZ12> = match shape {
+        TileShape::Hexagon => tiles::hexagon(),
+        TileShape::Square => tiles::square(),
+        TileShape::Triangle => tiles::triangle(),
+        TileShape::Spectre => tiles::spectre(),
+    };
+    Rat::from_unchecked(&snake)
+}
+
+// --- TileSet-based matching ---
+
+fn tileset_match<T: IsComplex + IsRingOrField + Units>(
+    patches: &[Rat<T>],
+    seed: &[Rat<T>],
+    verbose: bool,
+    validate: bool,
+) -> (BTreeSet<Rat<T>>, GlueStats) {
+    if patches.is_empty() || seed.is_empty() {
+        return (BTreeSet::new(), GlueStats::default());
+    }
+    let count_a = patches.len();
+    let count_b = seed.len();
+
+    let mut all_tiles: Vec<Rat<T>> = patches.to_vec();
+    all_tiles.extend(seed.iter().cloned());
+
+    let ts = TileSet::new(all_tiles);
+
+    let pairs: Vec<(usize, usize)> = (0..count_a)
+        .flat_map(|i| (count_a..count_a + count_b).map(move |j| (i, j)))
+        .collect();
+
+    let (results, stats) = if validate {
+        let t_fast = Instant::now();
+        let (fast, fast_stats) = ts.valid_rats_for_pairs(&pairs);
+        let dt_fast = t_fast.elapsed();
+
+        let mut bf_results = BTreeSet::new();
+        let mut bf_ops = 0;
+        let t_bf = Instant::now();
+        for a in patches {
+            for b in seed {
+                for ia in 0..a.len() {
+                    for ib in 0..b.len() {
+                        bf_ops += 1;
+                        if let Ok(glued) = a.try_glue((ia as i64, ib as i64), b) {
+                            bf_results.insert(glued);
+                        }
+                    }
+                }
+            }
+        }
+        let dt_bf = t_bf.elapsed();
+
+        if fast != bf_results {
+            eprintln!("MISMATCH!");
+            for r in fast.difference(&bf_results).take(5) {
+                eprintln!("  tileset extra: {:?}", r.seq());
+            }
+            for r in bf_results.difference(&fast).take(5) {
+                eprintln!("  brute force extra: {:?}", r.seq());
+            }
+            std::process::exit(1);
+        }
+        let speedup = dt_bf.as_secs_f64() / dt_fast.as_secs_f64().max(1e-9);
+        eprintln!(
+            "    validate: OK | tileset: {:.2?} ({} seqs) | brute_force: {:.2?} ({} ops) | {:.1}x speedup",
+            dt_fast, fast_stats.unique_sequences, dt_bf, bf_ops, speedup,
+        );
+        (fast, fast_stats)
+    } else {
+        ts.valid_rats_for_pairs(&pairs)
+    };
+
+    if verbose {
+        eprintln!(
+            "    tileset: {} x {} tiles, {} distinct",
+            count_a,
+            count_b,
+            results.len(),
+        );
+        eprintln!("    {stats}");
+    }
+
+    (results, stats)
+}
+
+fn run<T: IsComplex + IsRingOrField + Units>(
+    seed: Rat<T>,
+    max_size: usize,
+    verbose: bool,
+    validate: bool,
+    ring_label: &str,
+    tile_label: &str,
+) {
+    eprintln!(
+        "Seed: {}, edges: {}, ring: {}, max_size: {}, validate: {}",
+        tile_label,
+        seed.len(),
+        ring_label,
+        max_size,
+        validate,
+    );
+
+    let mut patches: BTreeMap<usize, Vec<Rat<T>>> = BTreeMap::new();
+    let mut all_stats: BTreeMap<usize, GlueStats> = BTreeMap::new();
+    let seed_vec = vec![seed];
+    patches.insert(1, seed_vec.clone());
+
+    for k in 2..=max_size {
+        let t0 = Instant::now();
+        let (results, stats) = tileset_match::<T>(&patches[&(k - 1)], &seed_vec, verbose, validate);
+        let elapsed = t0.elapsed();
+
+        eprintln!(
+            "  size {:>3} = {:>3} + {:>3}: {} distinct patches [{:.2?}]",
+            k,
+            k - 1,
+            1,
+            results.len(),
+            elapsed,
+        );
+        eprintln!("    {stats}");
+
+        all_stats.insert(k, stats);
+        patches.insert(k, results.into_iter().collect());
+    }
+
+    eprintln!("\n--- Summary ---");
+    let mut total = 0usize;
+    for (&size, pats) in &patches {
+        eprintln!("  size {:>3}: {:>6} patches", size, pats.len());
+        total += pats.len();
+    }
+    eprintln!("  total:   {total:>6} patches");
+
+    let mut aggregate = GlueStats::default();
+    for s in all_stats.values() {
+        aggregate += s.clone();
+    }
+    eprintln!("\n--- Aggregate Stats ---");
+    eprintln!("  {aggregate}");
+}
+
+fn main() {
+    let args = Args::parse();
+
+    if args.max_size < 1 {
+        eprintln!("max_size must be at least 1");
+        std::process::exit(1);
+    }
+    if args.validate && args.max_size > 4 {
+        eprintln!("note: --validate with max_size > 4 may be slow (brute force scales poorly)");
+    }
+
+    let tile_label = format!("{:?}", args.tile);
+    let ring_label = format!("{:?}", args.ring).to_lowercase();
+
+    match args.ring {
+        RingChoice::ZZ4 => {
+            let seed = seed_tile_zz4(&args.tile);
+            run::<ZZ4>(
+                seed,
+                args.max_size,
+                args.verbose,
+                args.validate,
+                &ring_label,
+                &tile_label,
+            );
+        }
+        RingChoice::ZZ12 => {
+            let seed = seed_tile_zz12(&args.tile);
+            run::<ZZ12>(
+                seed,
+                args.max_size,
+                args.verbose,
+                args.validate,
+                &ring_label,
+                &tile_label,
+            );
+        }
+    }
+}
diff --git a/src/bin/polyomino.rs b/src/bin/polyomino.rs
new file mode 100644
index 0000000..c808c0a
--- /dev/null
+++ b/src/bin/polyomino.rs
@@ -0,0 +1,709 @@
+use std::collections::BTreeMap;
+use std::time::Instant;
+
+use rustc_hash::FxHashSet;
+use std::collections::BTreeSet;
+
+struct ProfileStats {
+    enumerate_ns: u64,
+    apply_ns: u64,
+    enumerate_calls: usize,
+}
+
+use clap::Parser;
+use tilezz::intgeom::angles::normalize_angle;
+use tilezz::intgeom::rat::{lex_min_rot, Rat};
+use tilezz::intgeom::tiles;
+
+const HTURN: i8 = 2;
+const TURN: i32 = 4;
+
+#[derive(Parser)]
+#[command(
+    name = "polyomino",
+    about = "Specialized free hole-free polyomino enumerator"
+)]
+struct Args {
+    #[arg(long, default_value_t = 12)]
+    max_size: usize,
+
+    #[arg(long, default_value_t = false)]
+    profile: bool,
+}
+
+struct Site {
+    counter: usize,
+    ns: usize,
+    ml: usize,
+    ne: usize,
+}
+
+struct Patch {
+    angles: Vec<i8>,
+    counters: Vec<usize>,
+    next_counter: usize,
+    positions: Vec<(i32, i32)>,
+    visited: FxHashSet<(i32, i32)>,
+    rat: Rat<tilezz::cyclotomic::ZZ4>,
+}
+
+fn dir_step(dir: i32) -> (i32, i32) {
+    match dir.rem_euclid(TURN) {
+        0 => (1, 0),
+        1 => (0, 1),
+        2 => (-1, 0),
+        _ => (0, -1),
+    }
+}
+
+fn trace_positions(start: (i32, i32), start_dir: i32, angles: &[i8]) -> Vec<(i32, i32)> {
+    let mut pos = vec![start];
+    let mut dir = start_dir;
+    for &a in angles {
+        dir = (dir + a as i32).rem_euclid(TURN);
+        let (dx, dy) = dir_step(dir);
+        let last = *pos.last().unwrap();
+        pos.push((last.0 + dx, last.1 + dy));
+    }
+    pos
+}
+
+impl Patch {
+    fn from_seed(seed: &Rat<tilezz::cyclotomic::ZZ4>) -> Self {
+        let canonical = seed.clone().canonical();
+        let seq = canonical.seq();
+        let n = seq.len();
+        let points = trace_positions((0, 0), 0, seq);
+        let positions = points[..n].to_vec();
+        let visited: FxHashSet<(i32, i32)> = points[..=n].iter().copied().collect();
+        let rat = Rat::from_slice_unchecked(seq);
+        Patch {
+            angles: seq.to_vec(),
+            counters: (0..n).collect(),
+            next_counter: n,
+            positions,
+            visited,
+            rat,
+        }
+    }
+
+    fn to_rat(&self) -> Rat<tilezz::cyclotomic::ZZ4> {
+        self.rat.clone()
+    }
+
+    fn enumerate_sites(&self, seed_seq: &[i8], min_counter: usize) -> Vec<Site> {
+        let n = self.angles.len();
+        let m = seed_seq.len();
+        let mut seen: BTreeSet<(usize, usize, usize)> = BTreeSet::new();
+        let mut sites: Vec<Site> = Vec::new();
+
+        for ia in 0..n {
+            if self.counters[ia] < min_counter {
+                continue;
+            }
+            for ib in 0..m {
+                let (ns, ml, ne) = self.compute_match(ia, seed_seq, ib);
+                if ml == 0 || !seen.insert((ns, ml, ne)) {
+                    continue;
+                }
+                if ml >= 2 {
+                    let gap_left =
+                        self.angles[(ns + ml) % n] as i32 + seed_seq[(ne + m - ml) % m] as i32;
+                    let gap_right = self.angles[ns] as i32 + seed_seq[ne] as i32;
+                    if gap_left < 0 || gap_right < 0 {
+                        continue;
+                    }
+                } else {
+                    let left = self.angles[ia] as i32 + seed_seq[ib] as i32;
+                    let right =
+                        self.angles[(ia + 1) % n] as i32 + seed_seq[(ib + m - 1) % m] as i32;
+                    if left <= 0 || right <= 0 {
+                        continue;
+                    }
+                }
+                sites.push(Site {
+                    counter: self.counters[ns],
+                    ns,
+                    ml,
+                    ne,
+                });
+            }
+        }
+
+        sites.sort_by_key(|s| s.counter);
+        sites
+    }
+
+    fn compute_match(&self, ia: usize, seed_seq: &[i8], ib: usize) -> (usize, usize, usize) {
+        let n = self.angles.len();
+        let m = seed_seq.len();
+        let x: Vec<i8> = (0..n).map(|i| self.angles[(ia + i) % n]).collect();
+        let seed_slice: Vec<i8> = (0..m).map(|i| seed_seq[(ib + 1 + i) % m]).collect();
+        let y: Vec<i8> = seed_slice.iter().rev().map(|a| -a).collect();
+
+        let min_len = x.len().min(y.len());
+        if min_len < 2 {
+            return (ia, 1, ib);
+        }
+
+        let mut len = min_len;
+        for i in 1..min_len {
+            if x[i] != y[i] {
+                len = i;
+                break;
+            }
+        }
+        if x[0] != y[0] {
+            return (
+                ((ia as i64).rem_euclid(n as i64)) as usize,
+                len,
+                ((ib as i64).rem_euclid(m as i64)) as usize,
+            );
+        }
+
+        let remaining = min_len - len;
+        for i in 1..remaining {
+            if x[x.len() - i] != y[y.len() - i] {
+                return (
+                    ((ia as i64 - i as i64).rem_euclid(n as i64)) as usize,
+                    len + i,
+                    ((ib as i64 + i as i64).rem_euclid(m as i64)) as usize,
+                );
+            }
+        }
+        (
+            ((ia as i64 - remaining as i64).rem_euclid(n as i64)) as usize,
+            len + remaining,
+            ((ib as i64 + remaining as i64).rem_euclid(m as i64)) as usize,
+        )
+    }
+
+    fn try_apply(&self, site: &Site, seed_seq: &[i8]) -> Option<Patch> {
+        let m = seed_seq.len();
+        let n = self.angles.len();
+        let ml = site.ml;
+        let ns = site.ns;
+        let ne = site.ne;
+
+        let x_len = n - ml + 1;
+        let y_len = m - ml + 1;
+
+        let x: Vec<i8> = (0..x_len).map(|i| self.angles[(ns + ml + i) % n]).collect();
+        let y: Vec<i8> = (0..y_len).map(|i| seed_seq[(ne + i) % m]).collect();
+
+        let mut new_angles: Vec<i8> = Vec::with_capacity(x_len + y_len - 2);
+        new_angles.extend_from_slice(&x[..x_len - 1]);
+        new_angles.extend_from_slice(&y[..y_len - 1]);
+
+        let a_yx = normalize_angle::<tilezz::cyclotomic::ZZ4>(x[0] + y[y_len - 1] - HTURN);
+        let a_xy = normalize_angle::<tilezz::cyclotomic::ZZ4>(y[0] + x[x_len - 1] - HTURN);
+        if a_yx.abs() == HTURN || a_xy.abs() == HTURN {
+            return None;
+        }
+        new_angles[0] = a_yx;
+        new_angles[x_len - 1] = a_xy;
+
+        let junction_a = self.positions[(ns + ml) % n];
+        let junction_b = self.positions[ns];
+
+        let prev_idx = (ns + n - 1) % n;
+        let in_dir_b = dir_between(self.positions[prev_idx], self.positions[ns]);
+
+        let out_dir_b = (in_dir_b + a_xy as i32).rem_euclid(TURN);
+        let mut trace_angles = vec![0i8];
+        trace_angles.extend_from_slice(&y[1..y_len - 1]);
+        let new_points = trace_positions(junction_b, out_dir_b, &trace_angles);
+
+        let num_new = new_points.len();
+        for &p in &new_points[1..num_new] {
+            if p != junction_a && self.visited.contains(&p) {
+                return None;
+            }
+        }
+
+        let mut new_positions: Vec<(i32, i32)> = Vec::with_capacity(new_angles.len());
+        for i in 0..(x_len - 1) {
+            new_positions.push(self.positions[(ns + ml + i) % n]);
+        }
+        for &p in &new_points[..num_new - 1] {
+            new_positions.push(p);
+        }
+
+        let mut new_visited = self.visited.clone();
+        for &p in &new_points[..num_new] {
+            new_visited.insert(p);
+        }
+
+        let mut new_counters: Vec<usize> = Vec::with_capacity(new_angles.len());
+        let mut nc = self.next_counter;
+        for i in 0..(x_len - 1) {
+            let old_pos = (ns + ml + i) % n;
+            new_counters.push(self.counters[old_pos]);
+        }
+        for _ in 0..(y_len - 1) {
+            new_counters.push(nc);
+            nc += 1;
+        }
+
+        let offset = lex_min_rot(&new_angles);
+        new_angles.rotate_left(offset);
+        new_counters.rotate_left(offset);
+        new_positions.rotate_left(offset);
+
+        let rat = Rat::from_canonical_angles_unchecked(new_angles.clone());
+
+        Some(Patch {
+            angles: new_angles,
+            counters: new_counters,
+            next_counter: nc,
+            positions: new_positions,
+            visited: new_visited,
+            rat,
+        })
+    }
+}
+
+fn dir_between(a: (i32, i32), b: (i32, i32)) -> i32 {
+    let dx = b.0 - a.0;
+    let dy = b.1 - a.1;
+    match (dx, dy) {
+        (1, 0) => 0,
+        (0, 1) => 1,
+        (-1, 0) => 2,
+        (0, -1) => 3,
+        _ => panic!("non-unit step from ({a:?}) to ({b:?})"),
+    }
+}
+
+#[allow(dead_code)]
+fn make_free(
+    _onesided: &FxHashSet<Rat<tilezz::cyclotomic::ZZ4>>,
+) -> FxHashSet<Rat<tilezz::cyclotomic::ZZ4>> {
+    // Note: this function is now dead code since we store free polyominoes directly
+    // Keeping it for API compatibility - would need the same transformation logic
+    FxHashSet::default()
+}
+
+fn main() {
+    let args = Args::parse();
+
+    #[cfg(feature = "pprof")]
+    let guard = pprof::ProfilerGuardBuilder::default()
+        .frequency(1000)
+        .blocklist(&["libc", "libgcc", "pthread", "vdso"])
+        .build()
+        .unwrap();
+
+    let seed: Rat<tilezz::cyclotomic::ZZ4> = Rat::from_unchecked(&tiles::square());
+    let seed_seq = seed.seq().to_vec();
+
+    let t0 = Instant::now();
+    let mut results: BTreeMap<usize, FxHashSet<Rat<tilezz::cyclotomic::ZZ4>>> = BTreeMap::new();
+    let initial = Patch::from_seed(&seed);
+    let initial_rat = initial.to_rat();
+    let initial_free = std::cmp::min(initial_rat.clone(), initial_rat.reflected());
+    results.entry(1).or_default().insert(initial_free);
+
+    let mut profile = ProfileStats {
+        enumerate_ns: 0,
+        apply_ns: 0,
+        enumerate_calls: 0,
+    };
+
+    grow_recursive(
+        &initial,
+        &seed_seq,
+        0,
+        1,
+        args.max_size,
+        &mut results,
+        &mut profile,
+    );
+    let elapsed = t0.elapsed();
+
+    // Results already in free form - just print them
+    println!("=== Free hole-free polyominoes (incremental) ===");
+    let mut total = 0;
+    for size in 1..=args.max_size {
+        let patches = results.get(&size);
+        println!(
+            "  size {size:>3}: {:>6} patches",
+            patches.map(|s| s.len()).unwrap_or(0)
+        );
+        total += patches.map(|s| s.len()).unwrap_or(0);
+    }
+    println!("  total:   {total:>6} patches");
+    println!("  time:    {elapsed:.2?}");
+
+    if args.profile {
+        eprintln!(
+            "\nProfile: {} enumerate calls, {:.2}s enumerate, {:.2}s apply",
+            profile.enumerate_calls,
+            profile.enumerate_ns as f64 / 1e9,
+            profile.apply_ns as f64 / 1e9,
+        );
+    }
+
+    #[cfg(feature = "pprof")]
+    {
+        if let Ok(report) = guard.report().build() {
+            let flamegraph_file = "flamegraph.svg";
+            let mut file = std::fs::File::create(flamegraph_file).unwrap();
+            report.flamegraph(&mut file).unwrap();
+            eprintln!("\nFlame graph written to {}", flamegraph_file);
+        }
+    }
+}
+
+fn grow_recursive(
+    patch: &Patch,
+    seed_seq: &[i8],
+    min_counter: usize,
+    current_size: usize,
+    max_size: usize,
+    results: &mut BTreeMap<usize, FxHashSet<Rat<tilezz::cyclotomic::ZZ4>>>,
+    profile: &mut ProfileStats,
+) {
+    if current_size >= max_size {
+        return;
+    }
+
+    let t0 = Instant::now();
+    profile.enumerate_calls += 1;
+    let sites = patch.enumerate_sites(seed_seq, min_counter);
+    profile.enumerate_ns += t0.elapsed().as_nanos() as u64;
+
+    for site in &sites {
+        let t0 = Instant::now();
+        let new_patch = match patch.try_apply(site, seed_seq) {
+            Some(p) => p,
+            None => continue,
+        };
+        let rat = new_patch.to_rat();
+        profile.apply_ns += t0.elapsed().as_nanos() as u64;
+
+        // Store free (canonical) form directly to cut search space in half
+        let free_form = std::cmp::min(rat.clone(), rat.reflected());
+
+        let new_size = current_size + 1;
+        if results.entry(new_size).or_default().insert(free_form) {
+            grow_recursive(
+                &new_patch,
+                seed_seq,
+                site.counter,
+                new_size,
+                max_size,
+                results,
+                profile,
+            );
+        }
+    }
+}
+
+#[allow(dead_code)]
+fn enumerate_onesided(max_size: usize) -> BTreeMap<usize, FxHashSet<Rat<tilezz::cyclotomic::ZZ4>>> {
+    let seed: Rat<tilezz::cyclotomic::ZZ4> = Rat::from_unchecked(&tiles::square());
+    let seed_seq = seed.seq().to_vec();
+    let initial = Patch::from_seed(&seed);
+    let mut results: BTreeMap<usize, FxHashSet<Rat<tilezz::cyclotomic::ZZ4>>> = BTreeMap::new();
+    results.entry(1).or_default().insert(initial.to_rat());
+    let mut profile = ProfileStats {
+        enumerate_ns: 0,
+        apply_ns: 0,
+        enumerate_calls: 0,
+    };
+    grow_recursive(
+        &initial,
+        &seed_seq,
+        0,
+        1,
+        max_size,
+        &mut results,
+        &mut profile,
+    );
+    results
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tilezz::intgeom::rat::Rat;
+    use tilezz::intgeom::tiles;
+
+    fn square_seed() -> (Rat<tilezz::cyclotomic::ZZ4>, Vec<i8>) {
+        let seed: Rat<tilezz::cyclotomic::ZZ4> = Rat::from_unchecked(&tiles::square());
+        let seq = seed.seq().to_vec();
+        (seed, seq)
+    }
+
+    #[test]
+    fn test_dir_step() {
+        assert_eq!(dir_step(0), (1, 0));
+        assert_eq!(dir_step(1), (0, 1));
+        assert_eq!(dir_step(2), (-1, 0));
+        assert_eq!(dir_step(3), (0, -1));
+        assert_eq!(dir_step(-1), (0, -1));
+        assert_eq!(dir_step(4), (1, 0));
+    }
+
+    #[test]
+    fn test_dir_between() {
+        assert_eq!(dir_between((0, 0), (1, 0)), 0);
+        assert_eq!(dir_between((0, 0), (0, 1)), 1);
+        assert_eq!(dir_between((0, 0), (-1, 0)), 2);
+        assert_eq!(dir_between((0, 0), (0, -1)), 3);
+    }
+
+    #[test]
+    fn test_trace_positions_square() {
+        let pts = trace_positions((0, 0), 0, &[1, 1, 1, 1]);
+        assert_eq!(pts.len(), 5);
+        assert_eq!(pts[0], (0, 0));
+        assert_eq!(pts[1], (0, 1));
+        assert_eq!(pts[2], (-1, 1));
+        assert_eq!(pts[3], (-1, 0));
+        assert_eq!(pts[4], (0, 0));
+    }
+
+    #[test]
+    fn test_trace_positions_straight() {
+        let pts = trace_positions((0, 0), 0, &[0, 0]);
+        assert_eq!(pts, vec![(0, 0), (1, 0), (2, 0)]);
+    }
+
+    #[test]
+    fn test_trace_positions_empty() {
+        let pts = trace_positions((5, 5), 2, &[]);
+        assert_eq!(pts, vec![(5, 5)]);
+    }
+
+    #[test]
+    fn test_from_seed_square() {
+        let (seed, _) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        assert_eq!(patch.angles.len(), 4);
+        assert_eq!(patch.positions.len(), 4);
+        assert_eq!(patch.counters, vec![0, 1, 2, 3]);
+        assert_eq!(patch.next_counter, 4);
+        assert_eq!(patch.visited.len(), 4);
+        assert!(patch.visited.contains(&(0, 0)));
+        assert!(patch.visited.contains(&(0, 1)));
+        assert!(patch.visited.contains(&(-1, 1)));
+        assert!(patch.visited.contains(&(-1, 0)));
+    }
+
+    #[test]
+    fn test_compute_match_matches_general_code() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let n = patch.angles.len();
+        let m = seed_seq.len();
+
+        for ia in 0..n {
+            for ib in 0..m {
+                let (ns, ml, ne) = patch.compute_match(ia, &seed_seq, ib);
+
+                let (general_ns, general_ml, general_ne) =
+                    seed.get_match((ia as i64, ib as i64), &seed);
+
+                assert_eq!(
+                    (ns, ml, ne),
+                    (general_ns as usize, general_ml, general_ne as usize),
+                    "ia={ia}, ib={ib}: polyomino ({ns},{ml},{ne}) != general ({general_ns},{general_ml},{general_ne})"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_compute_match_revcomp_correct() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let n = patch.angles.len();
+        let m = seed_seq.len();
+
+        for ia in 0..n {
+            for ib in 0..m {
+                let (ns, ml, ne) = patch.compute_match(ia, &seed_seq, ib);
+                if ml < 2 {
+                    continue;
+                }
+
+                for k in 0..ml {
+                    let patch_angle = patch.angles[(ns + k) % n];
+                    let seed_angle = seed_seq[(ne + m - k) % m];
+                    assert_eq!(
+                        patch_angle, -seed_angle,
+                        "match at ia={ia}, ib={ib}, ne={ne}, ml={ml}, k={k}: patch angle {patch_angle} != neg seed angle {}",
+                        -seed_angle
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn test_enumerate_sites_square_finds_sites() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let sites = patch.enumerate_sites(&seed_seq, 0);
+        assert!(!sites.is_empty(), "single square should have glue sites");
+    }
+
+    #[test]
+    fn test_try_apply_produces_valid_rat() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let sites = patch.enumerate_sites(&seed_seq, 0);
+
+        let mut valid_count = 0;
+        for site in &sites {
+            if let Some(new_patch) = patch.try_apply(site, &seed_seq) {
+                let rat = new_patch.to_rat();
+                assert_eq!(rat.len(), 6, "2 glued squares should produce a 6-gon");
+                assert!(
+                    rat.seq().iter().all(|&a| a.abs() <= HTURN),
+                    "angles should be normalized: {:?}",
+                    rat.seq()
+                );
+                assert_eq!(new_patch.positions.len(), 6);
+                assert_eq!(new_patch.counters.len(), 6);
+                assert_eq!(new_patch.visited.len(), 6);
+                valid_count += 1;
+            }
+        }
+        assert!(
+            valid_count > 0,
+            "at least one site should apply successfully"
+        );
+    }
+
+    #[test]
+    fn test_size2_matches_general_glue() {
+        let seed: Rat<tilezz::cyclotomic::ZZ4> = Rat::from_unchecked(&tiles::square());
+        let general_results: FxHashSet<Rat<tilezz::cyclotomic::ZZ4>> = (0..seed.len())
+            .filter_map(|ia| seed.try_glue((ia as i64, 0), &seed).ok())
+            .collect();
+
+        let (_, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let sites = patch.enumerate_sites(&seed_seq, 0);
+        let incremental_results: FxHashSet<Rat<tilezz::cyclotomic::ZZ4>> = sites
+            .iter()
+            .filter_map(|site| patch.try_apply(site, &seed_seq))
+            .map(|p| p.to_rat())
+            .collect();
+
+        assert_eq!(
+            incremental_results, general_results,
+            "size-2 results differ: got {:?}, expected {:?}",
+            incremental_results, general_results
+        );
+    }
+
+    #[test]
+    fn test_free_matches_oeis() {
+        let max_size = 12;
+        let results = enumerate_onesided(max_size);
+        for k in 1..=max_size {
+            let count = results.get(&k).map(|s| s.len()).unwrap_or(0);
+            eprintln!("size {k}: {count}");
+        }
+
+        // Our implementation now stores free polyominoes directly
+        // (not one-sided like before)
+        // These should match OEIS A000105 (free polyominoes, one less than one-sided)
+        let expected = [
+            (1, 1),
+            (2, 1),
+            (3, 2),
+            (4, 5),
+            (5, 12),
+            (6, 35),
+            (7, 107),
+            (8, 363),
+            (9, 1248),
+            (10, 4460),
+            (11, 16094),
+            (12, 58937),
+        ];
+        for (size, expected_count) in expected.iter() {
+            let actual = results.get(size).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                actual, *expected_count,
+                "size {}: expected {}, got {}",
+                size, expected_count, actual
+            );
+        }
+    }
+
+    // Note: make_free is now unused since we store free form directly
+    // Keeping it for API compatibility and potential future use
+
+    #[test]
+    fn test_visited_grows_correctly() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let sites = patch.enumerate_sites(&seed_seq, 0);
+
+        for site in &sites {
+            if let Some(new_patch) = patch.try_apply(site, &seed_seq) {
+                for &p in &patch.visited {
+                    assert!(
+                        new_patch.visited.contains(&p),
+                        "visited should be superset: missing {p:?}"
+                    );
+                }
+                assert!(
+                    new_patch.visited.len() > patch.visited.len(),
+                    "visited should grow after a successful apply"
+                );
+                return;
+            }
+        }
+        panic!("no valid apply found");
+    }
+
+    #[test]
+    fn test_positions_form_cycle() {
+        let (seed, seed_seq) = square_seed();
+        let patch = Patch::from_seed(&seed);
+        let sites = patch.enumerate_sites(&seed_seq, 0);
+
+        for site in &sites {
+            if let Some(new_patch) = patch.try_apply(site, &seed_seq) {
+                let pts = trace_positions(
+                    new_patch.positions[0],
+                    if new_patch.positions.len() > 1 {
+                        dir_between(new_patch.positions[0], new_patch.positions[1])
+                    } else {
+                        0
+                    },
+                    &new_patch.angles,
+                );
+                let n = new_patch.angles.len();
+                assert_eq!(
+                    pts.len(),
+                    n + 1,
+                    "tracing {} angles should give {} points",
+                    n,
+                    n + 1
+                );
+                assert_eq!(
+                    *pts.first().unwrap(),
+                    *pts.last().unwrap(),
+                    "positions should form a closed cycle"
+                );
+                for (i, (pos, pt)) in new_patch
+                    .positions
+                    .iter()
+                    .zip(pts.iter())
+                    .enumerate()
+                    .take(n)
+                {
+                    assert_eq!(*pos, *pt, "position {i} mismatch with traced points");
+                }
+            }
+        }
+    }
+}
diff --git a/src/bin/rat_enum.rs b/src/bin/rat_enum.rs
index 93ffcc6..94fcf77 100644
--- a/src/bin/rat_enum.rs
+++ b/src/bin/rat_enum.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeSet;
+use std::collections::HashSet;
 use std::sync::Mutex;
 use std::time::Instant;
 
@@ -21,7 +21,7 @@ static VERBOSE: Mutex<bool> = Mutex::new(false);
 // this seems to be slower... maybe should optimize by using a trie of snakes
 // if one shorter snake does not work, no point using one with same prefix!
 pub fn rat_enum_alt<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
-    let mut result: BTreeSet<Vec<i8>> = BTreeSet::new();
+    let mut result: HashSet<Vec<i8>> = HashSet::new();
 
     // simple snakes of a fixed length (initialized with length 0 (dummy) and 1 (start))
     let mut k_snakes: Vec<Vec<Vec<i8>>> = vec![vec![vec![]]];
@@ -29,7 +29,7 @@ pub fn rat_enum_alt<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
     let mut total_snakes: usize = (ZZ::turn() - 1) as usize;
 
     println!("-------- enumeration started --------");
-    let status_log = |step, num_snakes: usize, result: &BTreeSet<_>| {
+    let status_log = |step, num_snakes: usize, result: &HashSet<_>| {
         println!(
             "round {}: {} snakes, {} rats",
             step,
@@ -69,8 +69,10 @@ pub fn rat_enum_alt<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
                     }
                     .canonical()
                 };
+
                 let seq = r.seq().to_vec();
-                if result.insert(seq.clone()) {
+                let is_new = result.insert(seq.clone());
+                if is_new {
                     total_snakes += 1;
                     println!("RAT {seq:?}");
                 }
@@ -85,7 +87,10 @@ pub fn rat_enum_alt<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
 
             // add the prolonged path
             next.push(s.angles().to_vec());
-            println!("SNAKE {:?}", s.angles());
+
+            if *VERBOSE.lock().unwrap() {
+                println!("SNAKE {:?}", s.angles());
+            }
         }
 
         k_snakes.push(next);
@@ -99,12 +104,12 @@ pub fn rat_enum_alt<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
 }
 
 pub fn rat_enum<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
-    let mut result: BTreeSet<Vec<i8>> = BTreeSet::new();
+    let mut result: HashSet<Vec<i8>> = HashSet::new();
 
     let mut snakes: Vec<Vec<i8>> = vec![vec![]];
 
     println!("-------- enumeration started --------");
-    let status_log = |step, snakes: &Vec<_>, result: &BTreeSet<_>| {
+    let status_log = |step, snakes: &Vec<Vec<i8>>, result: &HashSet<_>| {
         println!(
             "round {}: {} alive snakes, {} unique rats",
             step,
@@ -120,9 +125,13 @@ pub fn rat_enum<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
         let remaining_steps_sq = norm_sq(&ZZ::from(remaining));
 
         let mut next: Vec<Vec<i8>> = Vec::new();
+
         for seq in snakes.iter() {
+            let parent: Snake<ZZ> = Snake::from_slice_unsafe(seq);
+
             for direction in ((-ZZ::hturn() + 1)..ZZ::hturn()).rev() {
-                let mut s: Snake<ZZ> = Snake::try_from(seq.as_slice()).unwrap();
+                // O(1): clone parent (full Snake state) and add one step.
+                let mut s = parent.clone();
                 if !s.add(direction) {
                     continue; // invalid step (self-crossing)
                 }
@@ -137,8 +146,10 @@ pub fn rat_enum<ZZ: ZZType + Units>(max_steps: usize) -> Vec<Vec<i8>> {
                         }
                         .canonical()
                     };
+
                     let seq = r.seq().to_vec();
-                    if result.insert(seq.clone()) {
+                    let is_new = result.insert(seq.clone());
+                    if is_new {
                         println!("RAT {seq:?}");
                     }
                 }
diff --git a/src/cyclotomic/div.rs b/src/cyclotomic/div.rs
index b9baee9..51433d6 100644
--- a/src/cyclotomic/div.rs
+++ b/src/cyclotomic/div.rs
@@ -71,6 +71,10 @@ pub fn zz_inv<Z: SymNum + Conj + One + Zero>(val: &Z) -> Z {
         numer = numer * denom_conj;
         // update denominator (= (a + b)(a - b) = a^2 - b^2)
         denom = denom * denom_conj;
+        debug_assert!(
+            !denom.zz_coeffs().iter().all(|c| c.is_zero()),
+            "zz_inv: denominator collapsed to zero; likely i64 overflow in rationalization"
+        );
     }
 
     // now we have a rational denominator (i.e. no square root terms)
diff --git a/src/cyclotomic/gaussint.rs b/src/cyclotomic/gaussint.rs
index 6b0e260..be2619a 100644
--- a/src/cyclotomic/gaussint.rs
+++ b/src/cyclotomic/gaussint.rs
@@ -149,11 +149,20 @@ impl<
     type Output = Self;
 
     fn pow(self, other: u8) -> Self {
-        let mut x = Self::one();
-        for _ in 0..other {
-            x = x * self;
+        // Binary (exponentiation by squaring): O(log n) multiplications.
+        let mut base = self;
+        let mut exp = other;
+        let mut acc = Self::one();
+        while exp != 0 {
+            if (exp & 1) == 1 {
+                acc = acc * base;
+            }
+            exp >>= 1;
+            if exp != 0 {
+                base = base * base;
+            }
         }
-        x
+        acc
     }
 }
 impl<
diff --git a/src/cyclotomic/mod.rs b/src/cyclotomic/mod.rs
index 2f657cf..42d2f78 100644
--- a/src/cyclotomic/mod.rs
+++ b/src/cyclotomic/mod.rs
@@ -2,7 +2,7 @@
 
 mod numtraits;
 
-mod gaussint;
+pub mod gaussint;
 
 #[macro_use]
 mod symnum;
@@ -42,7 +42,7 @@ pub use units::Units;
 
 pub use traits::{ComplexTraits, FieldTraits, RealTraits, RingTraits};
 pub use traits::{HasZZ10, HasZZ12, HasZZ4, HasZZ6, HasZZ8};
-pub use traits::{IsComplex, IsField, IsReal, IsRealOrComplex, IsRing, IsRingOrField};
+pub use traits::{IsComplex, IsField, IsReal, IsRealOrComplex, IsRing, IsRingOrField, IsZZ4};
 pub use traits::{QQType, QType, ZType, ZZType};
 
 pub use types::{Z10, Z12, Z16, Z20, Z24, Z30, Z32, Z4, Z6, Z60, Z8};
diff --git a/src/cyclotomic/sign.rs b/src/cyclotomic/sign.rs
index 33d82d5..c9f42f1 100644
--- a/src/cyclotomic/sign.rs
+++ b/src/cyclotomic/sign.rs
@@ -102,6 +102,46 @@ pub fn signum_sum_sqrt_expr_4<T: IntRing + ZSigned + FromPrimitive>(
     sgn_bc_terms.signum() * (sq_lhs - sq_rhs).signum()
 }
 
+/// Floating-point-free sign of a + b*sqrt(5) + c*sqrt(10-2*sqrt(5)) + d*sqrt(5*(10-2*sqrt(5)))
+/// via recursive reduction from Q(sqrt(5), sqrt(10-2*sqrt(5))) to Q(sqrt(5)).
+///
+/// Group as z = P + Q*sqrt(y) where P = a + b*sqrt(5), Q = c + d*sqrt(5), y = 10 - 2*sqrt(5).
+/// Then sign(z) is determined by sign(P), sign(Q), and sign(P^2 - Q^2*y), with P^2 - Q^2*y in Q(sqrt(5)).
+pub fn signum_sum_sqrt_expr_4_pentagonal<T: IntRing + ZSigned + FromPrimitive>(
+    a: T,
+    b: T,
+    c: T,
+    d: T,
+) -> T {
+    let sp = signum_sum_sqrt_expr_2(a, T::one(), b, T::from_i8(5).unwrap());
+    let sq = signum_sum_sqrt_expr_2(c, T::one(), d, T::from_i8(5).unwrap());
+
+    if sp == sq {
+        return sp;
+    }
+    if sq.is_zero() {
+        return sp;
+    }
+
+    let int2 = T::from_i8(2).unwrap();
+    let int5 = T::from_i8(5).unwrap();
+    let int10 = T::from_i8(10).unwrap();
+    let int20 = T::from_i8(20).unwrap();
+    let int50 = T::from_i8(50).unwrap();
+
+    let aa = a * a;
+    let bb = b * b;
+    let cc = c * c;
+    let dd = d * d;
+
+    let alpha = aa + int5 * bb - int10 * cc + int20 * c * d - int50 * dd;
+    let beta = int2 * a * b + int2 * cc - int20 * c * d + int10 * dd;
+
+    let spq = signum_sum_sqrt_expr_2(alpha, T::one(), beta, int5);
+
+    -sq * spq
+}
+
 // --------
 
 /// This implementation uses f64 (works in all ZZ rings).
@@ -165,13 +205,28 @@ pub fn zz_partial_signum_4_sym<Z: IsReal>(val: &Z) -> Z {
     result
 }
 
+pub fn zz_partial_signum_4_pentagonal<Z: IsReal>(val: &Z) -> Z {
+    let cs = val.zz_coeffs();
+    debug_assert!(cs.len() == 4);
+
+    let rs = Z::zz_params().sym_roots_sqs;
+    debug_assert!(rs.len() == 4);
+
+    let sgn = signum_sum_sqrt_expr_4_pentagonal(cs[0], cs[1], cs[2], cs[3]);
+
+    let mut result = Z::zero();
+    result.zz_coeffs_mut()[0] = sgn;
+    result
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
 
     use super::super::constants::*;
     use super::super::symnum::ZZComplex;
-    use super::super::types::{Z24, ZZ24};
+    use super::super::types::{Z10, Z24, ZZ10, ZZ24};
+    use super::super::units::Units;
     use num_traits::One;
 
     #[test]
@@ -251,4 +306,55 @@ mod tests {
         assert_eq!(sign_zz24(a, -b, c, -d), m);
         assert_eq!(sign_zz24(a, b, -c, -d), p);
     }
+
+    #[test]
+    fn test_sum_root_expr_sign_4_pentagonal() {
+        let sign_zz10 = |a, b, c, d| signum_sum_sqrt_expr_4_pentagonal(a, b, c, d);
+
+        assert_eq!(sign_zz10(0, 0, 0, 0), 0);
+        assert_eq!(sign_zz10(1, 0, 0, 0), 1);
+        assert_eq!(sign_zz10(-1, 0, 0, 0), -1);
+        assert_eq!(sign_zz10(0, 1, 0, 0), 1);
+        assert_eq!(sign_zz10(0, -1, 0, 0), -1);
+        assert_eq!(sign_zz10(0, 0, 1, 0), 1);
+        assert_eq!(sign_zz10(0, 0, -1, 0), -1);
+        assert_eq!(sign_zz10(0, 0, 0, 1), 1);
+        assert_eq!(sign_zz10(0, 0, 0, -1), -1);
+
+        assert_eq!(sign_zz10(1, 1, 0, 0), 1);
+        assert_eq!(sign_zz10(-1, -1, 0, 0), -1);
+        assert_eq!(sign_zz10(0, 0, 1, 1), 1);
+        assert_eq!(sign_zz10(0, 0, -1, -1), -1);
+
+        assert_eq!(sign_zz10(-3, 0, 1, 0), -1);
+        assert_eq!(sign_zz10(3, 0, -1, 0), 1);
+        assert_eq!(sign_zz10(1, 0, 0, -2), -1);
+        assert_eq!(sign_zz10(-1, 0, 0, 2), 1);
+    }
+
+    #[test]
+    fn test_signum_zz10() {
+        let sq5: ZZ10 = sqrt5();
+        let sqy: ZZ10 = <ZZ10 as Units>::unit(1).im().into();
+        let sq5y: ZZ10 = sq5 * sqy;
+
+        let z = Z10::zero();
+        let p = Z10::one();
+        let m = -p;
+
+        let sign_zz10 = |a, b, c, d| {
+            (ZZ10::from(a) + ZZ10::from(b) * sq5 + ZZ10::from(c) * sqy + ZZ10::from(d) * sq5y)
+                .re()
+                .signum()
+        };
+
+        let (a, b, c, d) = (485, 343, 280, 198);
+        assert_eq!(sign_zz10(0, 0, 0, 0), z);
+        assert_eq!(sign_zz10(-a, -b, c, d), m);
+        assert_eq!(sign_zz10(-a, b, -c, d), p);
+        assert_eq!(sign_zz10(-a, b, c, -d), p);
+        assert_eq!(sign_zz10(a, -b, -c, d), m);
+        assert_eq!(sign_zz10(a, -b, c, -d), m);
+        assert_eq!(sign_zz10(a, b, -c, -d), p);
+    }
 }
diff --git a/src/cyclotomic/sign_extra_tests.rs b/src/cyclotomic/sign_extra_tests.rs
index 3e00f08..27e27d1 100644
--- a/src/cyclotomic/sign_extra_tests.rs
+++ b/src/cyclotomic/sign_extra_tests.rs
@@ -3,7 +3,9 @@
 
 #[cfg(test)]
 mod extra_tests {
-    use crate::cyclotomic::sign::{signum_sum_sqrt_expr_2, signum_sum_sqrt_expr_4};
+    use crate::cyclotomic::sign::{
+        signum_sum_sqrt_expr_2, signum_sum_sqrt_expr_4, signum_sum_sqrt_expr_4_pentagonal,
+    };
 
     // Slow reference using f64, only for tests.
     fn sign_f64(x: f64) -> i64 {
@@ -49,4 +51,25 @@ mod extra_tests {
             }
         }
     }
+
+    #[test]
+    fn test_signum_sum_sqrt_expr_4_pentagonal_matches_f64_on_small_range() {
+        let sqrt5: f64 = 2.236_067_977_499_79;
+        let y: f64 = 10.0 - 2.0 * sqrt5;
+        for a in -6..=6 {
+            for b in -6..=6 {
+                for c in -6..=6 {
+                    for d in -6..=6 {
+                        let got = signum_sum_sqrt_expr_4_pentagonal(a, b, c, d);
+                        let val = (a as f64)
+                            + (b as f64) * sqrt5
+                            + (c as f64) * y.sqrt()
+                            + (d as f64) * (5.0 * y).sqrt();
+                        let exp = sign_f64(val);
+                        assert_eq!(got, exp, "a={a} b={b} c={c} d={d}");
+                    }
+                }
+            }
+        }
+    }
 }
diff --git a/src/cyclotomic/symnum.rs b/src/cyclotomic/symnum.rs
index 19a9ea9..dce45ff 100644
--- a/src/cyclotomic/symnum.rs
+++ b/src/cyclotomic/symnum.rs
@@ -115,10 +115,14 @@ pub trait SymNum: Clone + Copy + PartialEq + Eq + Hash + Debug + Display {
     #[inline]
     fn scale<I: Integer + ToPrimitive>(&self, scalar: I) -> Self
     where
-        Self: Sized,
+        Self: Sized + Copy,
     {
-        let cs: Vec<Self::Scalar> = Self::zz_mul_scalar(self.zz_coeffs(), scalar.to_i64().unwrap());
-        Self::new(&cs)
+        let sc = Self::Scalar::from_i64(scalar.to_i64().unwrap()).unwrap();
+        let mut ret = *self;
+        for c in ret.zz_coeffs_mut().iter_mut() {
+            *c = *c * sc;
+        }
+        ret
     }
 
     // --------
@@ -156,26 +160,35 @@ pub trait SymNum: Clone + Copy + PartialEq + Eq + Hash + Debug + Display {
         ret
     }
 
-    fn zz_add(&self, other: &Self) -> Vec<Self::Scalar> {
-        let mut ret = Self::zz_zero_vec();
-        for (i, (aval, bval)) in self.zz_coeffs().iter().zip(other.zz_coeffs()).enumerate() {
-            ret[i] = *aval + *bval;
+    fn zz_add(&self, other: &Self) -> Self
+    where
+        Self: Sized + Copy,
+    {
+        let mut ret = *self;
+        for (r, b) in ret.zz_coeffs_mut().iter_mut().zip(other.zz_coeffs()) {
+            *r = *r + *b;
         }
         ret
     }
 
-    fn zz_sub(&self, other: &Self) -> Vec<Self::Scalar> {
-        let mut ret = Self::zz_zero_vec();
-        for (i, (aval, bval)) in self.zz_coeffs().iter().zip(other.zz_coeffs()).enumerate() {
-            ret[i] = *aval - *bval;
+    fn zz_sub(&self, other: &Self) -> Self
+    where
+        Self: Sized + Copy,
+    {
+        let mut ret = *self;
+        for (r, b) in ret.zz_coeffs_mut().iter_mut().zip(other.zz_coeffs()) {
+            *r = *r - *b;
         }
         ret
     }
 
-    fn zz_neg(&self) -> Vec<Self::Scalar> {
-        let mut ret = Self::zz_zero_vec();
-        for (i, val) in self.zz_coeffs().iter().enumerate() {
-            ret[i] = -(*val);
+    fn zz_neg(&self) -> Self
+    where
+        Self: Sized + Copy,
+    {
+        let mut ret = *self;
+        for r in ret.zz_coeffs_mut().iter_mut() {
+            *r = -*r;
         }
         ret
     }
@@ -353,7 +366,11 @@ macro_rules! impl_symnum {
             #[inline]
             fn zz_mul_scalar(x: &[Self::Scalar], scalar: i64) -> Vec<Self::Scalar> {
                 let sc: Self::Scalar = Self::Scalar::from(scalar);
-                x.into_iter().map(|c| *c * sc).collect()
+                let mut ret = [Self::Scalar::zero(); $params.sym_roots_num];
+                for (r, c) in ret.iter_mut().zip(x.iter()) {
+                    *r = *c * sc;
+                }
+                ret.to_vec()
             }
 
             fn new(coeffs: &[Self::Scalar]) -> Self {
@@ -365,15 +382,13 @@ macro_rules! impl_symnum {
             }
 
             fn complex64(&self) -> Complex64 {
-                // Avoid heap allocations: compute sum directly.
+                static SYM_ROOTS: std::sync::OnceLock<Vec<f64>> = std::sync::OnceLock::new();
+                let roots = SYM_ROOTS
+                    .get_or_init(|| $params.sym_roots_sqs.iter().map(|sq| sq.sqrt()).collect());
                 let mut ret = Complex64::zero();
-                for (c, root_sq) in self
-                    .zz_coeffs()
-                    .iter()
-                    .zip(Self::zz_params().sym_roots_sqs.iter())
-                {
+                for (c, root) in self.zz_coeffs().iter().zip(roots.iter()) {
                     let re = c.to_f64().unwrap();
-                    ret += Complex64::new(re * root_sq.sqrt(), 0.0);
+                    ret += Complex64::new(re * root, 0.0);
                 }
                 ret
             }
@@ -405,7 +420,11 @@ macro_rules! impl_symnum {
             #[inline]
             fn zz_mul_scalar(x: &[Self::Scalar], scalar: i64) -> Vec<Self::Scalar> {
                 let sc: Self::Scalar = Self::Scalar::from(scalar);
-                x.into_iter().map(|c| *c * sc).collect()
+                let mut ret = [Self::Scalar::zero(); $params.sym_roots_num];
+                for (r, c) in ret.iter_mut().zip(x.iter()) {
+                    *r = *c * sc;
+                }
+                ret.to_vec()
             }
 
             fn new(coeffs: &[Self::Scalar]) -> Self {
@@ -417,17 +436,14 @@ macro_rules! impl_symnum {
             }
 
             fn complex64(&self) -> Complex64 {
-                // Avoid heap allocations: compute sum directly.
+                static SYM_ROOTS: std::sync::OnceLock<Vec<f64>> = std::sync::OnceLock::new();
+                let roots = SYM_ROOTS
+                    .get_or_init(|| $params.sym_roots_sqs.iter().map(|sq| sq.sqrt()).collect());
                 let mut ret = Complex64::zero();
-                for (c, root_sq) in self
-                    .zz_coeffs()
-                    .iter()
-                    .zip(Self::zz_params().sym_roots_sqs.iter())
-                {
+                for (c, root) in self.zz_coeffs().iter().zip(roots.iter()) {
                     let re = c.real.to_f64().unwrap();
                     let im = c.imag.to_f64().unwrap();
-                    let u = root_sq.sqrt();
-                    ret += Complex64::new(re * u, im * u);
+                    ret += Complex64::new(re * root, im * root);
                 }
                 ret
             }
@@ -536,13 +552,19 @@ macro_rules! impl_complex_traits {
             }
 
             fn re(&self) -> <Self as IsRingOrField>::Real {
-                let cs: Vec<RatioT> = self.zz_coeffs().iter().map(|c| c.real).collect();
-                <Self as IsRingOrField>::Real::new(cs.as_slice())
+                let mut ret = <Self as IsRingOrField>::Real::zero();
+                for (r, c) in ret.zz_coeffs_mut().iter_mut().zip(self.zz_coeffs()) {
+                    *r = c.real;
+                }
+                ret
             }
 
             fn im(&self) -> <Self as IsRingOrField>::Real {
-                let cs: Vec<RatioT> = self.zz_coeffs().iter().map(|c| c.imag).collect();
-                <Self as IsRingOrField>::Real::new(cs.as_slice())
+                let mut ret = <Self as IsRingOrField>::Real::zero();
+                for (r, c) in ret.zz_coeffs_mut().iter_mut().zip(self.zz_coeffs()) {
+                    *r = c.imag;
+                }
+                ret
             }
         }
 
@@ -561,8 +583,11 @@ macro_rules! impl_conj {
         }
         impl Conj for $name {
             fn conj(&self) -> Self {
-                let cs: Vec<GIntT> = self.zz_coeffs().iter().map(|c| c.conj()).collect();
-                Self::new(&cs)
+                let mut ret = *self;
+                for c in ret.zz_coeffs_mut().iter_mut() {
+                    *c = c.conj();
+                }
+                ret
             }
         }
     };
@@ -574,19 +599,19 @@ macro_rules! impl_intring_traits {
         impl Neg for $t {
             type Output = Self;
             fn neg(self) -> Self {
-                Self::new(&Self::zz_neg(&self))
+                self.zz_neg()
             }
         }
         impl Add<$t> for $t {
             type Output = Self;
             fn add(self, other: Self) -> Self {
-                Self::new(&Self::zz_add(&self, &other))
+                self.zz_add(&other)
             }
         }
         impl Sub<$t> for $t {
             type Output = Self;
             fn sub(self, other: Self) -> Self {
-                Self::new(&Self::zz_sub(&self, &other))
+                self.zz_sub(&other)
             }
         }
         impl Mul<$t> for $t {
@@ -614,7 +639,7 @@ macro_rules! impl_intring_traits {
                 Self::new(&Self::zz_zero_vec())
             }
             fn is_zero(&self) -> bool {
-                self.zz_coeffs().to_vec() == Self::zz_zero_vec()
+                self.zz_coeffs().iter().all(|c| c.is_zero())
             }
         }
         impl One for $t {
@@ -622,7 +647,8 @@ macro_rules! impl_intring_traits {
                 Self::new(&Self::zz_one_vec())
             }
             fn is_one(&self) -> bool {
-                self.zz_coeffs().to_vec() == Self::zz_one_vec()
+                let cs = self.zz_coeffs();
+                cs[0].is_one() && cs[1..].iter().all(|c| c.is_zero())
             }
         }
 
@@ -784,25 +810,31 @@ macro_rules! impl_conversions {
         impl From<$z> for $zz {
             /// Lift real-valued ring value into the corresponding cyclomatic ring
             fn from(value: $z) -> Self {
-                let cs: Vec<<$zz as SymNum>::Scalar> =
-                    value.zz_coeffs().iter().map(|z| (*z).into()).collect();
-                Self::new(cs.as_slice())
+                let mut ret = Self::zero();
+                for (r, c) in ret.zz_coeffs_mut().iter_mut().zip(value.zz_coeffs()) {
+                    *r = (*c).into();
+                }
+                ret
             }
         }
         impl From<$z> for $qq {
             /// Lift real-valued ring value into the corresponding cyclomatic ring
             fn from(value: $z) -> Self {
-                let cs: Vec<<$zz as SymNum>::Scalar> =
-                    value.zz_coeffs().iter().map(|z| (*z).into()).collect();
-                Self::new(cs.as_slice())
+                let mut ret = Self::zero();
+                for (r, c) in ret.zz_coeffs_mut().iter_mut().zip(value.zz_coeffs()) {
+                    *r = (*c).into();
+                }
+                ret
             }
         }
         impl From<$q> for $qq {
             /// Lift real-valued ring value into the corresponding cyclomatic ring
             fn from(value: $q) -> Self {
-                let cs: Vec<<$qq as SymNum>::Scalar> =
-                    value.zz_coeffs().iter().map(|z| (*z).into()).collect();
-                Self::new(cs.as_slice())
+                let mut ret = Self::zero();
+                for (r, c) in ret.zz_coeffs_mut().iter_mut().zip(value.zz_coeffs()) {
+                    *r = (*c).into();
+                }
+                ret
             }
         }
 
diff --git a/src/cyclotomic/traits.rs b/src/cyclotomic/traits.rs
index 9b9bc85..ed2aca0 100644
--- a/src/cyclotomic/traits.rs
+++ b/src/cyclotomic/traits.rs
@@ -87,11 +87,18 @@ pub trait QQType: IsField<Complex = Self> + IsComplex<Field = Self> {
     type Ring: ZZType<Field = Self>;
 }
 
+/// Ring is exactly ZZ4 (all edges are axis-aligned unit segments).
+/// Unlike HasZZ4 (which includes ZZ8, ZZ12, etc.), only ZZ4 itself implements this.
+pub trait IsZZ4Impl {}
+
 /// rings containing ZZ4
 pub trait HasZZ4Impl {}
 pub trait HasZZ4: IsComplex + HasZZ4Impl + From<(IntT, IntT)> {}
 impl<T: IsComplex + HasZZ4Impl + From<(IntT, IntT)>> HasZZ4 for T {}
 
+pub trait IsZZ4: IsComplex + IsZZ4Impl {}
+impl<T: IsComplex + IsZZ4Impl> IsZZ4 for T {}
+
 /// rings containing ZZ6
 pub trait HasZZ6Impl {}
 pub trait HasZZ6: IsComplex + HasZZ6Impl {}
diff --git a/src/cyclotomic/types.rs b/src/cyclotomic/types.rs
index b2eb3ff..e73b3f0 100644
--- a/src/cyclotomic/types.rs
+++ b/src/cyclotomic/types.rs
@@ -15,14 +15,14 @@ use super::gaussint::GaussInt;
 use super::numtraits::{Ccw, Conj, InnerIntType, IntField, IntRing, OneImag, ZSigned};
 use super::params::*;
 use super::sign::{
-    zz_partial_signum_1_sym, zz_partial_signum_2_sym, zz_partial_signum_4_sym,
-    zz_partial_signum_fallback,
+    zz_partial_signum_1_sym, zz_partial_signum_2_sym, zz_partial_signum_4_pentagonal,
+    zz_partial_signum_4_sym, zz_partial_signum_fallback,
 };
 use super::symnum::{GIntT, RatioT, SymNum, ZZComplex, ZZParams};
 use super::traits::{
     ComplexTraits, FieldTraits, HasZZ10Impl, HasZZ12Impl, HasZZ4Impl, HasZZ6Impl, HasZZ8Impl,
-    IsComplex, IsField, IsReal, IsRealOrComplex, IsRing, IsRingOrField, QQType, QType, RealTraits,
-    RingTraits, ZType, ZZType,
+    IsComplex, IsField, IsReal, IsRealOrComplex, IsRing, IsRingOrField, IsZZ4Impl, QQType, QType,
+    RealTraits, RingTraits, ZType, ZZType,
 };
 use super::units::Units;
 
@@ -113,10 +113,10 @@ super::units::impl_units_for!(QQ60);
 impl_functional_traits!(ZZ4, Z4, QQ4, Q4, zz_partial_signum_1_sym);
 impl_functional_traits!(ZZ6, Z6, QQ6, Q6, zz_partial_signum_2_sym);
 impl_functional_traits!(ZZ8, Z8, QQ8, Q8, zz_partial_signum_2_sym);
-impl_functional_traits!(ZZ10, Z10, QQ10, Q10, zz_partial_signum_fallback);
+impl_functional_traits!(ZZ10, Z10, QQ10, Q10, zz_partial_signum_4_pentagonal);
 impl_functional_traits!(ZZ12, Z12, QQ12, Q12, zz_partial_signum_2_sym);
-impl_functional_traits!(ZZ16, Z16, QQ16, Q16, zz_partial_signum_fallback);
-impl_functional_traits!(ZZ20, Z20, QQ20, Q20, zz_partial_signum_fallback);
+impl_functional_traits!(ZZ16, Z16, QQ16, Q16, zz_partial_signum_4_sym);
+impl_functional_traits!(ZZ20, Z20, QQ20, Q20, zz_partial_signum_4_pentagonal);
 impl_functional_traits!(ZZ24, Z24, QQ24, Q24, zz_partial_signum_4_sym);
 impl_functional_traits!(ZZ30, Z30, QQ30, Q30, zz_partial_signum_fallback);
 impl_functional_traits!(ZZ32, Z32, QQ32, Q32, zz_partial_signum_fallback);
@@ -124,6 +124,7 @@ impl_functional_traits!(ZZ60, Z60, QQ60, Q60, zz_partial_signum_fallback);
 impl_conversions_for!(4 6 8 10 12 16 20 24 30 32 60);
 
 zz_triv_impl!(HasZZ4Impl, ZZ4 ZZ8 ZZ12 ZZ16 ZZ20 ZZ24 ZZ32 ZZ60);
+zz_triv_impl!(IsZZ4Impl, ZZ4);
 impl_from_int_pair!(4 8 12 16 20 24 32 60);
 impl_from_real_pair!(QQ4 QQ8 QQ12 QQ16 QQ20 QQ24 QQ32 QQ60);
 
diff --git a/src/intgeom/grid.rs b/src/intgeom/grid.rs
index 78ac530..f4fb1d1 100644
--- a/src/intgeom/grid.rs
+++ b/src/intgeom/grid.rs
@@ -27,13 +27,9 @@ impl UnitSquareGrid {
 
     /// Given a complex integer point, return the 5 unit square lattice cells
     /// that make up the neighborhood of the cell of the point.
-    pub fn neighborhood_of<T: SymNum>(zz: T) -> Vec<(i64, i64)> {
-        let c @ (x, y) = Self::cell_of(zz);
-        let r = (x + 1, y);
-        let l = (x - 1, y);
-        let u = (x, y + 1);
-        let d = (x, y - 1);
-        vec![l, d, c, u, r] // sorted by first x, then y
+    pub fn neighborhood_of<T: SymNum>(zz: T) -> [(i64, i64); 5] {
+        let (x, y) = Self::cell_of(zz);
+        [(x - 1, y), (x, y - 1), (x, y), (x, y + 1), (x + 1, y)]
     }
 
     /// Given endpoints of a unit length line segment,
@@ -41,9 +37,10 @@ impl UnitSquareGrid {
     /// 1. the line segment is guaranteed to be fully contained inside, and
     /// 2. all intersecting unit length segments also have at least one point in it.
     pub fn seg_neighborhood_of<T: SymNum>(p1: T, p2: T) -> Vec<(i64, i64)> {
-        let mut result = Self::neighborhood_of(p1);
-        result.extend(Self::neighborhood_of(p2));
-        result.sort();
+        let mut result = Vec::with_capacity(8);
+        result.extend_from_slice(&Self::neighborhood_of(p1));
+        result.extend_from_slice(&Self::neighborhood_of(p2));
+        result.sort_unstable();
         result.dedup();
         result
     }
@@ -69,24 +66,39 @@ impl UnitSquareGrid {
     }
 
     /// Get all values attached to the given grid cell.
-    pub fn get(&self, (x, y): (i64, i64)) -> Vec<usize> {
-        match self.cols_rows.get(&x) {
-            None => vec![],
-            Some(col) => match col.get(&y) {
-                None => vec![],
-                Some(vals) => vals.clone(),
-            },
-        }
+    ///
+    /// Returns a reference to the indices stored in this cell, or an empty slice.
+    pub fn get(&self, (x, y): (i64, i64)) -> &[usize] {
+        self.cols_rows
+            .get(&x)
+            .and_then(|col| col.get(&y))
+            .map(Vec::as_slice)
+            .unwrap_or(&[])
     }
 
     /// Returns indices of all points in the given unit square lattice
     /// that are located inside one of the given cells.
     ///
-    /// Note that this does neither sort not deduplicate points.
+    /// Note that this does neither sort nor deduplicate points.
     pub fn get_cells(&self, cells: &[(i64, i64)]) -> Vec<usize> {
-        let mut pt_indices: Vec<usize> = Vec::new();
-        for cell in cells {
-            pt_indices.extend(self.get(*cell));
+        let total_entries: usize = cells
+            .iter()
+            .map(|&(x, y)| {
+                self.cols_rows
+                    .get(&x)
+                    .and_then(|col| col.get(&y))
+                    .map(Vec::len)
+                    .unwrap_or(0)
+            })
+            .sum();
+
+        let mut pt_indices = Vec::with_capacity(total_entries);
+        for &(x, y) in cells {
+            if let Some(col) = self.cols_rows.get(&x) {
+                if let Some(vals) = col.get(&y) {
+                    pt_indices.extend(vals);
+                }
+            }
         }
         pt_indices
     }
@@ -98,9 +110,10 @@ impl UnitSquareGrid {
         x_range: (Bound<i64>, Bound<i64>),
         y_range: (Bound<i64>, Bound<i64>),
     ) -> Vec<usize> {
-        let mut pt_indices: Vec<usize> = Vec::new();
+        let mut pt_indices = Vec::new();
         for (_, col) in self.cols_rows.range(x_range) {
             for (_, cell) in col.range(y_range) {
+                pt_indices.reserve(cell.len());
                 pt_indices.extend(cell);
             }
         }
@@ -198,6 +211,9 @@ mod tests {
         grid.add((1, 0), 0);
 
         // sanity-check
+        assert_eq!(grid.get((0, 0)), &[0, 1]);
+        assert_eq!(grid.get((1, 0)), &[2, 0]);
+        assert_eq!(grid.get((2, 0)), &[]);
         assert_eq!(grid.get_cells(&[(0, 0)]), &[0, 1]);
         assert_eq!(grid.get_cells(&[(1, 0)]), &[2, 0]);
         assert_eq!(UnitSquareGrid::get_cells(&grid, &[(2, 0)]), &[]);
diff --git a/src/intgeom/growing.rs b/src/intgeom/growing.rs
new file mode 100644
index 0000000..7c97ad1
--- /dev/null
+++ b/src/intgeom/growing.rs
@@ -0,0 +1,1090 @@
+use std::collections::BTreeMap;
+use std::marker::PhantomData;
+use std::sync::OnceLock;
+
+use rustc_hash::FxHashSet;
+
+use crate::cyclotomic::gaussint::GaussInt;
+#[cfg(feature = "cyclotomic_intersect")]
+use crate::cyclotomic::geometry::intersect;
+use crate::cyclotomic::{IsComplex, IsRingOrField, SymNum, Units};
+use crate::intgeom::angles::normalize_angle;
+use crate::intgeom::rat::{lex_min_rot, Rat};
+use crate::intgeom::snake::Snake;
+
+pub(crate) struct GlueSite {
+    pub(crate) counter: usize,
+    pub(crate) norm_start: usize,
+    pub(crate) match_len: usize,
+    pub(crate) norm_end: usize,
+}
+
+pub trait PatchPos: Copy + Clone + Eq + std::hash::Hash + 'static {
+    fn zero() -> Self;
+    fn add_unit(&self, dir: i8) -> Self;
+    fn sub_pos(&self, other: &Self) -> Self;
+    fn direction_of(&self) -> i8;
+    fn turn() -> i8;
+}
+
+fn scale_ratio(r: &num_rational::Ratio<i64>, scaling_fac: i64) -> i32 {
+    (*r.numer() * (scaling_fac / *r.denom())) as i32
+}
+
+fn build_scaled_table<T, const N: usize>(scaling_fac: i64) -> Vec<[i32; N]>
+where
+    T: Units + SymNum<Scalar = GaussInt<num_rational::Ratio<i64>>>,
+{
+    (0..T::turn())
+        .map(|d| {
+            let unit = T::unit(d);
+            let mut arr = [0i32; N];
+            let mut idx = 0;
+            for gi in unit.zz_coeffs() {
+                arr[idx] = scale_ratio(&gi.real, scaling_fac);
+                arr[idx + 1] = scale_ratio(&gi.imag, scaling_fac);
+                idx += 2;
+            }
+            arr
+        })
+        .collect()
+}
+
+macro_rules! define_pos {
+    ($name:ident, $n:expr) => {
+        #[derive(Copy, Clone, PartialEq, Eq, std::hash::Hash, Debug, Default)]
+        pub struct $name([i32; $n]);
+    };
+}
+
+define_pos!(Pos2, 2);
+define_pos!(Pos4, 4);
+define_pos!(Pos8, 8);
+
+macro_rules! impl_patch_pos {
+    ($pos:ty, $n:expr, $zz:ty, $sf:expr) => {
+        impl $pos {
+            fn scaled_unit_table() -> &'static Vec<[i32; $n]> {
+                static TABLE: OnceLock<Vec<[i32; $n]>> = OnceLock::new();
+                TABLE.get_or_init(|| build_scaled_table::<$zz, $n>($sf))
+            }
+        }
+
+        impl PatchPos for $pos {
+            fn zero() -> Self {
+                Self([0i32; $n])
+            }
+
+            fn add_unit(&self, dir: i8) -> Self {
+                let tab = Self::scaled_unit_table();
+                let mut r = self.0;
+                let step = &tab[dir.rem_euclid(<$zz as SymNum>::turn()) as usize];
+                for i in 0..$n {
+                    r[i] += step[i];
+                }
+                Self(r)
+            }
+
+            fn sub_pos(&self, other: &Self) -> Self {
+                let mut r = self.0;
+                for i in 0..$n {
+                    r[i] -= other.0[i];
+                }
+                Self(r)
+            }
+
+            fn direction_of(&self) -> i8 {
+                let tab = Self::scaled_unit_table();
+                for d in 0..tab.len() {
+                    if self.0 == tab[d] {
+                        return d as i8;
+                    }
+                }
+                panic!("direction_of: not a unit step");
+            }
+
+            fn turn() -> i8 {
+                <$zz as SymNum>::turn()
+            }
+        }
+    };
+}
+
+impl_patch_pos!(Pos2, 2, crate::cyclotomic::ZZ4, 1);
+impl_patch_pos!(Pos4, 4, crate::cyclotomic::ZZ12, 2);
+impl_patch_pos!(Pos8, 8, crate::cyclotomic::ZZ10, 8);
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+#[derive(Copy, Clone, Debug)]
+struct SR(i64, i64);
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+impl SR {
+    fn mul(self, o: Self) -> Self {
+        SR(self.0 * o.0 + 3 * self.1 * o.1, self.0 * o.1 + self.1 * o.0)
+    }
+    fn add(self, o: Self) -> Self {
+        SR(self.0 + o.0, self.1 + o.1)
+    }
+    fn is_zero(self) -> bool {
+        self.0 == 0 && self.1 == 0
+    }
+    fn is_positive(self) -> bool {
+        sign_sqrt3(self.0, self.1) == std::cmp::Ordering::Greater
+    }
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn sign_sqrt3(a: i64, b: i64) -> std::cmp::Ordering {
+    if b == 0 {
+        return a.cmp(&0);
+    }
+    if a == 0 {
+        return b.cmp(&0);
+    }
+    let sa = a.cmp(&0);
+    let sb = b.cmp(&0);
+    if sa == sb {
+        return sa;
+    }
+    let aa = a.unsigned_abs();
+    let bb = b.unsigned_abs();
+    match (aa * aa).cmp(&(3 * bb * bb)) {
+        std::cmp::Ordering::Greater => sa,
+        std::cmp::Ordering::Less => sb,
+        std::cmp::Ordering::Equal => std::cmp::Ordering::Equal,
+    }
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn re4(p: Pos4) -> SR {
+    SR(i64::from(p.0[0]), i64::from(p.0[2]))
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn im4(p: Pos4) -> SR {
+    SR(i64::from(p.0[1]), i64::from(p.0[3]))
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn wedge4(p1: Pos4, p2: Pos4) -> SR {
+    re4(p1).mul(im4(p2)).sub(im4(p1).mul(re4(p2)))
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn dot4(p1: Pos4, p2: Pos4) -> SR {
+    re4(p1).mul(re4(p2)).add(im4(p1).mul(im4(p2)))
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+impl SR {
+    fn sub(self, o: Self) -> Self {
+        SR(self.0 - o.0, self.1 - o.1)
+    }
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn is_ccw4(p: Pos4, a: Pos4, b: Pos4) -> bool {
+    wedge4(a.sub_pos(&p), b.sub_pos(&p)).is_positive()
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn is_between4(p: Pos4, a: Pos4, b: Pos4) -> bool {
+    let v = a.sub_pos(&p);
+    let w = p.sub_pos(&b);
+    wedge4(v, w).is_zero() && dot4(v, w).is_positive()
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn intersect4(a: Pos4, b: Pos4, c: Pos4, d: Pos4) -> bool {
+    if a == c || a == d || b == c || b == d {
+        return false;
+    }
+    let l_ab_0 = im4(a.sub_pos(&b));
+    let l_ab_1 = re4(b.sub_pos(&a));
+    let l_ab_2 = wedge4(a, b);
+    let colinear_c = l_ab_0
+        .mul(re4(c))
+        .add(l_ab_1.mul(im4(c)))
+        .add(l_ab_2)
+        .is_zero();
+    let colinear_d = l_ab_0
+        .mul(re4(d))
+        .add(l_ab_1.mul(im4(d)))
+        .add(l_ab_2)
+        .is_zero();
+    if colinear_c && colinear_d {
+        is_between4(c, a, b) || is_between4(d, a, b)
+    } else {
+        is_ccw4(a, c, d) != is_ccw4(b, c, d) && is_ccw4(a, b, c) != is_ccw4(a, b, d)
+    }
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+fn check_new_segments_cross_existing_pos4(
+    new_points: &[Pos4],
+    parent_positions: &[Pos4],
+    n: usize,
+    ns: usize,
+    ml: usize,
+) -> bool {
+    let num_new = new_points.len();
+    if num_new < 2 {
+        return false;
+    }
+    let x_len = n - ml + 1;
+    for ni in 0..(num_new - 1) {
+        let na = new_points[ni];
+        let nb = new_points[ni + 1];
+        for xi in 0..(x_len - 1) {
+            let ei = (ns + ml + xi) % n;
+            let ea = parent_positions[ei];
+            let eb = parent_positions[(ei + 1) % n];
+            if intersect4(na, nb, ea, eb) {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+pub trait HasPatchPos: IsComplex + IsRingOrField + Units {
+    type Pos: PatchPos;
+    fn check_segment_crossings(
+        new_points: &[Self::Pos],
+        parent_positions: &[Self::Pos],
+        n: usize,
+        ns: usize,
+        ml: usize,
+    ) -> bool;
+    fn needs_snake_validation() -> bool {
+        true
+    }
+}
+
+impl HasPatchPos for crate::cyclotomic::ZZ4 {
+    type Pos = Pos2;
+    fn check_segment_crossings(
+        _new_points: &[Self::Pos],
+        _parent_positions: &[Self::Pos],
+        _n: usize,
+        _ns: usize,
+        _ml: usize,
+    ) -> bool {
+        false
+    }
+    fn needs_snake_validation() -> bool {
+        false
+    }
+}
+
+impl HasPatchPos for crate::cyclotomic::ZZ10 {
+    type Pos = Pos8;
+    fn check_segment_crossings(
+        _new_points: &[Self::Pos],
+        _parent_positions: &[Self::Pos],
+        _n: usize,
+        _ns: usize,
+        _ml: usize,
+    ) -> bool {
+        false
+    }
+}
+
+#[cfg(feature = "cyclotomic_intersect")]
+impl PatchPos for crate::cyclotomic::ZZ12 {
+    fn zero() -> Self {
+        <Self as num_traits::Zero>::zero()
+    }
+    fn add_unit(&self, dir: i8) -> Self {
+        *self + Self::unit(dir)
+    }
+    fn sub_pos(&self, other: &Self) -> Self {
+        *self - *other
+    }
+    fn direction_of(&self) -> i8 {
+        for d in 0..<Self as SymNum>::turn() {
+            if *self == Self::unit(d) {
+                return d;
+            }
+        }
+        panic!("direction_of: not a unit step");
+    }
+    fn turn() -> i8 {
+        <Self as SymNum>::turn()
+    }
+}
+
+#[cfg(feature = "cyclotomic_intersect")]
+fn check_segments_cross_cyclotomic(
+    new_points: &[crate::cyclotomic::ZZ12],
+    parent_positions: &[crate::cyclotomic::ZZ12],
+    n: usize,
+    ns: usize,
+    ml: usize,
+) -> bool {
+    let num_new = new_points.len();
+    if num_new < 2 {
+        return false;
+    }
+    let x_len = n - ml + 1;
+    for ni in 0..(num_new - 1) {
+        let na = new_points[ni];
+        let nb = new_points[ni + 1];
+        for xi in 0..(x_len - 1) {
+            let ei = (ns + ml + xi) % n;
+            let ea = parent_positions[ei];
+            let eb = parent_positions[(ei + 1) % n];
+            if intersect(&(na, nb), &(ea, eb)) {
+                return true;
+            }
+        }
+    }
+    false
+}
+
+#[cfg(feature = "cyclotomic_intersect")]
+impl HasPatchPos for crate::cyclotomic::ZZ12 {
+    type Pos = Self;
+    fn check_segment_crossings(
+        new_points: &[Self::Pos],
+        parent_positions: &[Self::Pos],
+        n: usize,
+        ns: usize,
+        ml: usize,
+    ) -> bool {
+        check_segments_cross_cyclotomic(new_points, parent_positions, n, ns, ml)
+    }
+    fn needs_snake_validation() -> bool {
+        false
+    }
+}
+
+#[cfg(not(feature = "cyclotomic_intersect"))]
+impl HasPatchPos for crate::cyclotomic::ZZ12 {
+    type Pos = Pos4;
+    fn check_segment_crossings(
+        new_points: &[Self::Pos],
+        parent_positions: &[Self::Pos],
+        n: usize,
+        ns: usize,
+        ml: usize,
+    ) -> bool {
+        check_new_segments_cross_existing_pos4(new_points, parent_positions, n, ns, ml)
+    }
+    fn needs_snake_validation() -> bool {
+        false
+    }
+}
+
+fn trace_positions_from<P: PatchPos>(start: P, start_dir: i8, angles: &[i8]) -> Vec<P> {
+    let mut positions = vec![start];
+    let mut dir = start_dir;
+    for &a in angles {
+        dir = (dir as i64 + a as i64).rem_euclid(P::turn() as i64) as i8;
+        let last = *positions.last().unwrap();
+        positions.push(last.add_unit(dir));
+    }
+    positions
+}
+
+pub struct GrowingPatch<T: HasPatchPos> {
+    angles: Vec<i8>,
+    counters: Vec<usize>,
+    next_counter: usize,
+    cached_rat: Option<Rat<T>>,
+    positions: Vec<T::Pos>,
+    visited: FxHashSet<T::Pos>,
+    _phantom: PhantomData<T>,
+}
+
+impl<T: HasPatchPos> GrowingPatch<T> {
+    pub fn len(&self) -> usize {
+        self.angles.len()
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.angles.len() == 0
+    }
+}
+
+impl<T: HasPatchPos> GrowingPatch<T> {
+    pub fn from_seed(seed: &Rat<T>) -> Self {
+        let canonical = seed.clone().canonical();
+        let seq = canonical.seq();
+        let n = seq.len();
+        let rat = Rat::from_slice_unchecked(seq);
+        let all_positions = trace_positions_from(T::Pos::zero(), 0, seq);
+        let positions = all_positions[..n].to_vec();
+        let visited = all_positions.into_iter().collect();
+        GrowingPatch {
+            angles: seq.to_vec(),
+            counters: (0..n).collect(),
+            next_counter: n,
+            cached_rat: Some(rat),
+            positions,
+            visited,
+            _phantom: PhantomData,
+        }
+    }
+
+    pub fn to_rat(&self) -> Rat<T> {
+        self.cached_rat
+            .clone()
+            .unwrap_or_else(|| Rat::from_slice_unchecked(&self.angles))
+    }
+
+    fn compute_match_inline(&self, ia: usize, seed_seq: &[i8], ib: usize) -> (usize, usize, usize) {
+        let n = self.angles.len();
+        let m = seed_seq.len();
+        let min_len = n.min(m);
+
+        if min_len < 2 {
+            return (ia, 1, ib);
+        }
+
+        let mut len = min_len;
+        for i in 1..min_len {
+            if self.angles[(ia + i) % n] != -seed_seq[(ib + m - i) % m] {
+                len = i;
+                break;
+            }
+        }
+
+        if self.angles[ia] != -seed_seq[ib] {
+            return (ia, len, ib);
+        }
+
+        let remaining = min_len - len;
+        for i in 1..remaining {
+            if self.angles[(ia + n - i) % n] != -seed_seq[(ib + i) % m] {
+                return (
+                    (ia as i64 - i as i64).rem_euclid(n as i64) as usize,
+                    len + i,
+                    (ib as i64 + i as i64).rem_euclid(m as i64) as usize,
+                );
+            }
+        }
+        (
+            (ia as i64 - remaining as i64).rem_euclid(n as i64) as usize,
+            len + remaining,
+            (ib as i64 + remaining as i64).rem_euclid(m as i64) as usize,
+        )
+    }
+
+    fn enumerate_sites_inline(&self, seed_seq: &[i8], min_counter: usize) -> Vec<GlueSite> {
+        let n = self.angles.len();
+        let m = seed_seq.len();
+        let mut seen: FxHashSet<(usize, usize, usize)> = FxHashSet::default();
+        let mut sites: Vec<GlueSite> = Vec::new();
+
+        for ia in 0..n {
+            if self.counters[ia] < min_counter {
+                continue;
+            }
+            for ib in 0..m {
+                let (ns, ml, ne) = self.compute_match_inline(ia, seed_seq, ib);
+                if ml == 0 || !seen.insert((ns, ml, ne)) {
+                    continue;
+                }
+                if ml >= 2 {
+                    let gap_left =
+                        self.angles[(ns + ml) % n] as i32 + seed_seq[(ne + m - ml) % m] as i32;
+                    let gap_right = self.angles[ns] as i32 + seed_seq[ne] as i32;
+                    if gap_left < 0 || gap_right < 0 {
+                        continue;
+                    }
+                } else {
+                    let left = self.angles[ia] as i32 + seed_seq[ib] as i32;
+                    let right =
+                        self.angles[(ia + 1) % n] as i32 + seed_seq[(ib + m - 1) % m] as i32;
+                    if left <= 0 || right <= 0 {
+                        continue;
+                    }
+                }
+                sites.push(GlueSite {
+                    counter: self.counters[ns],
+                    norm_start: ns,
+                    match_len: ml,
+                    norm_end: ne,
+                });
+            }
+        }
+
+        sites.sort_by_key(|s| s.counter);
+        sites
+    }
+
+    fn apply_site(&self, site: &GlueSite, seed: &Rat<T>) -> Option<GrowingPatch<T>> {
+        let seed_seq = seed.seq();
+        let m = seed_seq.len();
+        let n = self.angles.len();
+        let ml = site.match_len;
+        let ns = site.norm_start;
+        let ne = site.norm_end;
+
+        let x_len = n - ml + 1;
+        let y_len = m - ml + 1;
+
+        let x: Vec<i8> = (0..x_len).map(|i| self.angles[(ns + ml + i) % n]).collect();
+        let y: Vec<i8> = (0..y_len).map(|i| seed_seq[(ne + i) % m]).collect();
+
+        let mut new_angles: Vec<i8> = Vec::with_capacity(x_len + y_len - 2);
+        new_angles.extend_from_slice(&x[..x_len - 1]);
+        new_angles.extend_from_slice(&y[..y_len - 1]);
+
+        let a_yx = normalize_angle::<T>(x[0] + y[y_len - 1] - T::hturn());
+        let a_xy = normalize_angle::<T>(y[0] + x[x_len - 1] - T::hturn());
+        if a_yx.abs() == T::hturn() || a_xy.abs() == T::hturn() {
+            return None;
+        }
+        new_angles[0] = a_yx;
+        new_angles[x_len - 1] = a_xy;
+
+        let junction_a = self.positions[(ns + ml) % n];
+        let junction_b = self.positions[ns];
+
+        let prev_idx = (ns + n - 1) % n;
+        let step_in = self.positions[ns].sub_pos(&self.positions[prev_idx]);
+        let in_dir = step_in.direction_of();
+        let out_dir: i8 = (in_dir as i64 + a_xy as i64).rem_euclid(T::turn() as i64) as i8;
+
+        let mut trace_angles: Vec<i8> = Vec::with_capacity(y_len - 1);
+        trace_angles.push(0i8);
+        if y_len > 2 {
+            trace_angles.extend_from_slice(&y[1..y_len - 1]);
+        }
+        let new_points = trace_positions_from(junction_b, out_dir, &trace_angles);
+
+        let num_new = new_points.len();
+        for p in &new_points[1..num_new] {
+            if *p != junction_a && self.visited.contains(p) {
+                return None;
+            }
+        }
+
+        if T::check_segment_crossings(&new_points, &self.positions, n, ns, ml) {
+            return None;
+        }
+
+        let mut new_positions: Vec<T::Pos> = Vec::with_capacity(new_angles.len());
+        for i in 0..(x_len - 1) {
+            new_positions.push(self.positions[(ns + ml + i) % n]);
+        }
+        for p in &new_points[..num_new - 1] {
+            new_positions.push(*p);
+        }
+
+        let mut new_visited = self.visited.clone();
+        for p in &new_points {
+            new_visited.insert(*p);
+        }
+
+        let mut new_counters: Vec<usize> = Vec::with_capacity(new_angles.len());
+        let mut nc = self.next_counter;
+        for i in 0..(x_len - 1) {
+            let old_pos = (ns + ml + i) % n;
+            new_counters.push(self.counters[old_pos]);
+        }
+        for _ in 0..(y_len - 1) {
+            new_counters.push(nc);
+            nc += 1;
+        }
+
+        let offset = lex_min_rot(&new_angles);
+        new_angles.rotate_left(offset);
+        new_counters.rotate_left(offset);
+        new_positions.rotate_left(offset);
+
+        let cached_rat = Rat::from_canonical_angles_unchecked(new_angles.clone());
+
+        Some(GrowingPatch {
+            angles: new_angles,
+            counters: new_counters,
+            next_counter: nc,
+            cached_rat: Some(cached_rat),
+            positions: new_positions,
+            visited: new_visited,
+            _phantom: PhantomData,
+        })
+    }
+}
+
+#[derive(Default)]
+pub struct GrowStats {
+    pub enumerate_calls: usize,
+    pub enumerate_ns: u64,
+    pub apply_ns: u64,
+}
+
+impl std::fmt::Display for GrowStats {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "enumerate: {} calls, {:.2}s | apply: {:.2}s",
+            self.enumerate_calls,
+            self.enumerate_ns as f64 / 1e9,
+            self.apply_ns as f64 / 1e9,
+        )
+    }
+}
+
+struct GrowState<T: HasPatchPos> {
+    results: BTreeMap<usize, FxHashSet<Rat<T>>>,
+    stats: GrowStats,
+}
+
+pub fn grow_redelmeier<T>(seed: &Rat<T>, max_size: usize) -> BTreeMap<usize, FxHashSet<Rat<T>>>
+where
+    T: HasPatchPos,
+{
+    grow_redelmeier_inner(seed, max_size, false).0
+}
+
+pub fn grow_redelmeier_free<T>(seed: &Rat<T>, max_size: usize) -> BTreeMap<usize, FxHashSet<Rat<T>>>
+where
+    T: HasPatchPos,
+{
+    grow_redelmeier_inner(seed, max_size, true).0
+}
+
+pub fn grow_redelmeier_profiled<T>(
+    seed: &Rat<T>,
+    max_size: usize,
+) -> (BTreeMap<usize, FxHashSet<Rat<T>>>, GrowStats)
+where
+    T: HasPatchPos,
+{
+    grow_redelmeier_inner(seed, max_size, false)
+}
+
+fn grow_redelmeier_inner<T>(
+    seed: &Rat<T>,
+    max_size: usize,
+    free: bool,
+) -> (BTreeMap<usize, FxHashSet<Rat<T>>>, GrowStats)
+where
+    T: HasPatchPos,
+{
+    let mut state = GrowState {
+        results: BTreeMap::new(),
+        stats: GrowStats::default(),
+    };
+    let initial = GrowingPatch::from_seed(seed);
+    let initial_rat = initial.to_rat();
+    let stored = if free {
+        std::cmp::min(initial_rat.clone(), initial_rat.reflected())
+    } else {
+        initial_rat
+    };
+    state.results.entry(1).or_default().insert(stored);
+    grow_recursive_inner(&initial, seed, 0, 1, max_size, free, &mut state);
+    (state.results, state.stats)
+}
+
+fn grow_recursive_inner<T>(
+    patch: &GrowingPatch<T>,
+    seed: &Rat<T>,
+    min_counter: usize,
+    current_size: usize,
+    max_size: usize,
+    free: bool,
+    state: &mut GrowState<T>,
+) where
+    T: HasPatchPos,
+{
+    if current_size >= max_size {
+        return;
+    }
+
+    let seed_seq = seed.seq();
+
+    let sites = {
+        state.stats.enumerate_calls += 1;
+        let t0 = std::time::Instant::now();
+        let sites = patch.enumerate_sites_inline(seed_seq, min_counter);
+        state.stats.enumerate_ns += t0.elapsed().as_nanos() as u64;
+        sites
+    };
+
+    for site in &sites {
+        let t0 = std::time::Instant::now();
+        let new_patch = match patch.apply_site(site, seed) {
+            Some(p) => p,
+            None => {
+                state.stats.apply_ns += t0.elapsed().as_nanos() as u64;
+                continue;
+            }
+        };
+        let rat = new_patch.to_rat();
+
+        if T::needs_snake_validation() && Snake::<T>::try_from(rat.seq()).is_err() {
+            state.stats.apply_ns += t0.elapsed().as_nanos() as u64;
+            continue;
+        }
+
+        state.stats.apply_ns += t0.elapsed().as_nanos() as u64;
+
+        let new_size = current_size + 1;
+        let stored = if free {
+            std::cmp::min(rat.clone(), rat.reflected())
+        } else {
+            rat
+        };
+        if state.results.entry(new_size).or_default().insert(stored) {
+            grow_recursive_inner(
+                &new_patch,
+                seed,
+                site.counter,
+                new_size,
+                max_size,
+                free,
+                state,
+            );
+        }
+    }
+}
+
+pub fn make_free<T>(onesided: &FxHashSet<Rat<T>>) -> FxHashSet<Rat<T>>
+where
+    T: HasPatchPos,
+{
+    onesided
+        .iter()
+        .map(|r| std::cmp::min(r.clone(), r.reflected()))
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    use crate::cyclotomic::geometry::intersect;
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    use crate::cyclotomic::OneImag;
+    use crate::cyclotomic::{ZZ12, ZZ4};
+    use crate::intgeom::tiles;
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    use num_traits::{One, Zero};
+
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    fn zz12_to_pos4(t: ZZ12) -> Pos4 {
+        let mut arr = [0i32; 4];
+        let mut idx = 0;
+        for gi in t.zz_coeffs() {
+            arr[idx] = scale_ratio(&gi.real, 2);
+            arr[idx + 1] = scale_ratio(&gi.imag, 2);
+            idx += 2;
+        }
+        Pos4(arr)
+    }
+
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    fn build_patch_positions(angles: &[i8]) -> Vec<ZZ12> {
+        let mut positions = vec![ZZ12::zero()];
+        let mut dir: i8 = 0;
+        for &a in angles {
+            dir = (dir as i64 + a as i64).rem_euclid(12) as i8;
+            let last = *positions.last().unwrap();
+            positions.push(last + ZZ12::unit(dir));
+        }
+        positions
+    }
+
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    #[test]
+    fn segment_intersect_matches_cyclotomic() {
+        let cases: Vec<(ZZ12, ZZ12, ZZ12, ZZ12)> = vec![
+            (ZZ12::zero(), ZZ12::one(), ZZ12::from(2), ZZ12::from(3)),
+            (ZZ12::zero(), ZZ12::one(), ZZ12::one(), ZZ12::from(2)),
+            (ZZ12::zero(), ZZ12::from(2), ZZ12::one(), ZZ12::from(3)),
+            (
+                ZZ12::zero(),
+                ZZ12::one_i(),
+                ZZ12::one(),
+                ZZ12::one() + ZZ12::one_i(),
+            ),
+            (
+                ZZ12::zero(),
+                ZZ12::one() + ZZ12::one_i(),
+                ZZ12::one(),
+                ZZ12::one_i(),
+            ),
+        ];
+        for (a, b, c, d) in &cases {
+            let expected = intersect(&(*a, *b), &(*c, *d));
+            let got = intersect4(
+                zz12_to_pos4(*a),
+                zz12_to_pos4(*b),
+                zz12_to_pos4(*c),
+                zz12_to_pos4(*d),
+            );
+            assert_eq!(
+                got, expected,
+                "intersect({a:?}, {b:?}, {c:?}, {d:?}): expected {expected}, got {got}"
+            );
+        }
+    }
+
+    #[cfg(not(feature = "cyclotomic_intersect"))]
+    #[test]
+    fn segment_intersect_fuzz_against_cyclotomic() {
+        let hex_angles: &[i8] = &[2, 2, 2, 2, 2, 2];
+        let spectre_angles: &[i8] = &[3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2];
+        for angles in [hex_angles, spectre_angles] {
+            let positions = build_patch_positions(angles);
+            let n = positions.len() - 1;
+            for i in 0..n {
+                for k in 0..n {
+                    let a = positions[i];
+                    let b = positions[(i + 1) % (n + 1)];
+                    let c = positions[k];
+                    let d = positions[(k + 1) % (n + 1)];
+                    let expected = intersect(&(a, b), &(c, d));
+                    let got = intersect4(
+                        zz12_to_pos4(a),
+                        zz12_to_pos4(b),
+                        zz12_to_pos4(c),
+                        zz12_to_pos4(d),
+                    );
+                    assert_eq!(
+                        got, expected,
+                        "angles={angles:?} seg({i},{}) vs seg({k},{}): expected {expected}, got {got}",
+                        (i+1) % (n+1), (k+1) % (n+1)
+                    );
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn hexagon_matches_old_approach_size4() {
+        let seed: Rat<ZZ12> = Rat::from_unchecked(&tiles::hexagon());
+        let new_results = grow_redelmeier(&seed, 4);
+
+        let mut old_results: BTreeMap<usize, FxHashSet<Rat<ZZ12>>> = BTreeMap::new();
+        old_results.insert(1, std::iter::once(seed.clone()).collect());
+
+        for k in 2..=4 {
+            let prev: Vec<Rat<ZZ12>> = old_results[&(k - 1)].iter().cloned().collect();
+            let count_a = prev.len();
+            let mut all_tiles = prev;
+            all_tiles.push(seed.clone());
+            let ts = crate::intgeom::tileset::TileSet::new(all_tiles);
+            let pairs: Vec<(usize, usize)> = (0..count_a).map(|i| (i, count_a)).collect();
+            let (results, _) = ts.valid_rats_for_pairs(&pairs);
+            old_results.insert(k, results.into_iter().collect());
+        }
+
+        for k in 1..=4 {
+            let old_count = old_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            let new_count = new_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                old_count, new_count,
+                "size {k}: old={old_count} new={new_count}"
+            );
+            if let (Some(old_set), Some(new_set)) = (old_results.get(&k), new_results.get(&k)) {
+                assert_eq!(
+                    old_set.len(),
+                    new_set.len(),
+                    "size {k}: sets differ in length"
+                );
+                for r in old_set.iter() {
+                    assert!(new_set.contains(r), "size {k}: missing {r}");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn spectre_matches_old_approach_size3() {
+        let seed: Rat<ZZ12> = Rat::from_unchecked(&tiles::spectre());
+        let new_results = grow_redelmeier(&seed, 3);
+
+        let mut old_results: BTreeMap<usize, FxHashSet<Rat<ZZ12>>> = BTreeMap::new();
+        old_results.insert(1, std::iter::once(seed.clone()).collect());
+
+        for k in 2..=3 {
+            let prev: Vec<Rat<ZZ12>> = old_results[&(k - 1)].iter().cloned().collect();
+            let count_a = prev.len();
+            let mut all_tiles = prev;
+            all_tiles.push(seed.clone());
+            let ts = crate::intgeom::tileset::TileSet::new(all_tiles);
+            let pairs: Vec<(usize, usize)> = (0..count_a).map(|i| (i, count_a)).collect();
+            let (results, _) = ts.valid_rats_for_pairs(&pairs);
+            old_results.insert(k, results.into_iter().collect());
+        }
+
+        for k in 1..=3 {
+            let old_count = old_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            let new_count = new_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                old_count, new_count,
+                "size {k}: old={old_count} new={new_count}"
+            );
+            if let (Some(old_set), Some(new_set)) = (old_results.get(&k), new_results.get(&k)) {
+                assert_eq!(
+                    old_set.len(),
+                    new_set.len(),
+                    "size {k}: sets differ in length"
+                );
+                for r in old_set.iter() {
+                    assert!(new_set.contains(r), "size {k}: missing {r}");
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn square_zz4_matches_old_approach_size6() {
+        let seed: Rat<ZZ4> = Rat::from_unchecked(&tiles::square());
+        let new_results = grow_redelmeier(&seed, 6);
+
+        let mut old_results: BTreeMap<usize, FxHashSet<Rat<ZZ4>>> = BTreeMap::new();
+        old_results.insert(1, std::iter::once(seed.clone()).collect());
+
+        for k in 2..=6 {
+            let prev: Vec<Rat<ZZ4>> = old_results[&(k - 1)].iter().cloned().collect();
+            let count_a = prev.len();
+            let mut all_tiles = prev;
+            all_tiles.push(seed.clone());
+            let ts = crate::intgeom::tileset::TileSet::new(all_tiles);
+            let pairs: Vec<(usize, usize)> = (0..count_a).map(|i| (i, count_a)).collect();
+            let (results, _) = ts.valid_rats_for_pairs(&pairs);
+            old_results.insert(k, results.into_iter().collect());
+        }
+
+        for k in 1..=6 {
+            let old_count = old_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            let new_count = new_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                old_count, new_count,
+                "size {k}: old={old_count} new={new_count}"
+            );
+            if let (Some(old_set), Some(new_set)) = (old_results.get(&k), new_results.get(&k)) {
+                assert_eq!(
+                    old_set.len(),
+                    new_set.len(),
+                    "size {k}: sets differ in length"
+                );
+                for r in old_set.iter() {
+                    assert!(new_set.contains(r), "size {k}: missing {r}");
+                }
+            }
+        }
+    }
+
+    fn brute_force_grow<T: HasPatchPos>(
+        seed: &Rat<T>,
+        max_size: usize,
+    ) -> BTreeMap<usize, FxHashSet<Rat<T>>> {
+        let mut results: BTreeMap<usize, FxHashSet<Rat<T>>> = BTreeMap::new();
+        results.insert(1, std::iter::once(seed.clone()).collect());
+
+        for k in 2..=max_size {
+            let prev: Vec<Rat<T>> = results[&(k - 1)].iter().cloned().collect();
+            let mut next: FxHashSet<Rat<T>> = FxHashSet::default();
+            for patch in &prev {
+                for ia in 0..patch.len() {
+                    for ib in 0..seed.len() {
+                        if let Ok(glued) = patch.try_glue((ia as i64, ib as i64), seed) {
+                            next.insert(glued);
+                        }
+                    }
+                }
+            }
+            results.insert(k, next);
+        }
+        results
+    }
+
+    #[test]
+    fn square_zz4_brute_force_vs_tileset_size7() {
+        let seed: Rat<ZZ4> = Rat::from_unchecked(&tiles::square());
+        let bf = brute_force_grow(&seed, 7);
+
+        eprintln!("=== Brute force (try_glue) ===");
+        for k in 1..=7 {
+            let count = bf.get(&k).map(|s| s.len()).unwrap_or(0);
+            eprintln!("  size {k}: {count}");
+        }
+
+        let redel = grow_redelmeier(&seed, 7);
+        eprintln!("=== Redelmeier (TileSet) ===");
+        for k in 1..=7 {
+            let count = redel.get(&k).map(|s| s.len()).unwrap_or(0);
+            eprintln!("  size {k}: {count}");
+        }
+
+        for k in 1..=7 {
+            let bf_count = bf.get(&k).map(|s| s.len()).unwrap_or(0);
+            let redel_count = redel.get(&k).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                bf_count, redel_count,
+                "size {k}: brute_force={bf_count} redelmeier={redel_count}"
+            );
+            if let (Some(bf_set), Some(redel_set)) = (bf.get(&k), redel.get(&k)) {
+                for r in bf_set.iter() {
+                    assert!(
+                        redel_set.contains(r),
+                        "size {k}: brute_force patch missing from redelmeier: {:?}",
+                        r.seq()
+                    );
+                }
+                for r in redel_set.iter() {
+                    assert!(
+                        bf_set.contains(r),
+                        "size {k}: redelmeier patch missing from brute_force: {:?}",
+                        r.seq()
+                    );
+                }
+            }
+        }
+    }
+
+    const FREE_POLYOMINOES_NO_HOLES: &[usize] = &[
+        1, 1, 1, 2, 5, 12, 35, 107, 363, 1248, 4460, 16094, 58937, 217117,
+    ];
+
+    #[test]
+    fn square_zz4_free_polyominoes_match_oeis() {
+        let max_size: usize = 9;
+        let seed: Rat<ZZ4> = Rat::from_unchecked(&tiles::square());
+        let onesided = grow_redelmeier(&seed, max_size);
+
+        for (k, &expected) in FREE_POLYOMINOES_NO_HOLES
+            .iter()
+            .enumerate()
+            .take(max_size + 1)
+            .skip(1)
+        {
+            let onesided_set = onesided.get(&k).unwrap();
+            let free_set = make_free(onesided_set);
+            let free_count = free_set.len();
+            assert_eq!(
+                free_count, expected,
+                "size {k}: got {free_count}, expected {expected} free polyominoes (no holes)"
+            );
+        }
+    }
+
+    #[test]
+    fn grow_redelmeier_free_matches_oeis() {
+        let max_size: usize = 9;
+        let seed: Rat<ZZ4> = Rat::from_unchecked(&tiles::square());
+        let free_results = grow_redelmeier_free(&seed, max_size);
+
+        for (k, &expected) in FREE_POLYOMINOES_NO_HOLES
+            .iter()
+            .enumerate()
+            .take(max_size + 1)
+            .skip(1)
+        {
+            let free_count = free_results.get(&k).map(|s| s.len()).unwrap_or(0);
+            assert_eq!(
+                free_count, expected,
+                "size {k}: got {free_count}, expected {expected} free polyominoes (no holes)"
+            );
+        }
+    }
+}
diff --git a/src/intgeom/mod.rs b/src/intgeom/mod.rs
index 7057cd2..f819ef5 100644
--- a/src/intgeom/mod.rs
+++ b/src/intgeom/mod.rs
@@ -9,3 +9,7 @@ pub mod snake;
 pub mod rat;
 
 pub mod tiles;
+
+pub mod tileset;
+
+pub mod growing;
diff --git a/src/intgeom/rat.rs b/src/intgeom/rat.rs
index 3e900f0..dc049b4 100644
--- a/src/intgeom/rat.rs
+++ b/src/intgeom/rat.rs
@@ -15,7 +15,7 @@ use super::snake::{Snake, Turtle};
 ///
 /// See <https://en.wikipedia.org/wiki/Lexicographically_minimal_string_rotation>
 ///
-fn lex_min_rot<T: Eq + Ord>(s: &[T]) -> usize {
+pub fn lex_min_rot<T: Eq + Ord>(s: &[T]) -> usize {
     let n = s.len() as isize;
     let mut f: Vec<isize> = vec![-1; 2 * s.len()];
     let mut k: isize = 0;
@@ -61,7 +61,7 @@ fn prepare_seq(angles: &[i8]) -> (Vec<i8>, usize) {
 /// (assuming that we start to match both at index 0)
 /// and possibly a offset to be subtracted from the left side
 /// (if the match can be extended to the left, assuming the sequences are cyclic)
-fn match_length(x: &[i8], y: &[i8]) -> (usize, usize) {
+pub(crate) fn match_length(x: &[i8], y: &[i8]) -> (usize, usize) {
     let min_len = x.len().min(y.len());
     if min_len < 2 {
         // at least one sequence has
@@ -70,7 +70,7 @@ fn match_length(x: &[i8], y: &[i8]) -> (usize, usize) {
     }
 
     // we start checking at the 2nd point and look for the right side end
-    let mut len = 1;
+    let mut len = min_len;
     for i in 1..min_len {
         if x[i] != y[i] {
             // first non-complementary angle -> right end of match
@@ -84,15 +84,20 @@ fn match_length(x: &[i8], y: &[i8]) -> (usize, usize) {
     }
 
     // go cyclically into the other direction to try extending the interval
-    let mut offset = 0;
-    for i in 1..(min_len - len) {
+    let remaining = min_len - len;
+    if remaining == 0 {
+        return (len, 0);
+    }
+    let mut offset = remaining;
+    for i in 1..remaining {
         if x[x.len() - i] != y[y.len() - i] {
             // first non-complementary angle -> left end of match
             offset = i;
             len += i;
-            break;
+            return (len, offset);
         }
     }
+    len += offset;
     (len, offset)
 }
 
@@ -156,6 +161,24 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
         }
     }
 
+    /// Create a rat from an angle sequence.
+    /// Caller guarantees angles are already in canonical (lex-min) rotation.
+    /// This skips normalization for performance - only use if you're certain!
+    #[doc(hidden)]
+    pub fn from_canonical_angles_unchecked(angles: Vec<i8>) -> Self {
+        let _n = angles.len();
+        let angle_sum: i64 = angles.iter().map(|x| *x as i64).sum();
+        let mut seq = angles;
+        let seq2 = seq.clone();
+        seq.extend_from_slice(&seq2);
+        Self {
+            phantom: PhantomData,
+            angles: seq,
+            angle_sum,
+            cyc: 0,
+        }
+    }
+
     /// Create a rat from a snake.
     /// Assumes that the sequence describes a valid simple polygon.
     pub fn from_unchecked(snake: &Snake<T>) -> Self {
@@ -225,7 +248,7 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
 
     /// Shift the starting node of the tile (does not affect equivalence).
     pub fn cycle(mut self, offset: i64) -> Self {
-        self.cyc = ((self.cyc as i64 + offset) % (self.len() as i64)) as usize;
+        self.cyc = (self.cyc as i64 + offset).rem_euclid(self.len() as i64) as usize;
         self
     }
 
@@ -246,7 +269,7 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
     /// Return a slice of the angle sequence with length n.
     pub fn slice_from(&self, start: i64, len: usize) -> &[i8] {
         self.slice_from_canonical(
-            ((self.cyc as i64 + start) % self.len() as i64) as usize,
+            (self.cyc as i64 + start).rem_euclid(self.len() as i64) as usize,
             len,
         )
     }
@@ -285,7 +308,7 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
         let (len, offset) = match_length(x_seq, y_seq.as_slice());
 
         let ext_start: i64 = (self_start - offset as i64).rem_euclid(self.len() as i64);
-        let ext_end: i64 = (other_end + offset as i64).rem_euclid(self.len() as i64);
+        let ext_end: i64 = (other_end + offset as i64).rem_euclid(other.len() as i64);
         (ext_start, len, ext_end)
     }
 
@@ -312,18 +335,15 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
     /// point twice. Thus the existing checks and implementation details already
     /// ensure holefreeness.
     /// QED.
-    fn try_glue_impl(
+    fn try_glue_with_match_info(
         &self,
-        matched_indices: (i64, i64),
+        (norm_start, mlen, norm_end): (i64, usize, i64),
         other: &Self,
         unchecked: bool,
     ) -> Result<Self, &str> {
         if self.chirality() != other.chirality() {
-            // NOTE: could implicitly reflect a tile and adjust the denoted index,
-            //       but this is very low priority right now.
             return Err("Cannot glue rats of opposite chirality (not implemented)!");
         }
-        let (norm_start, mlen, norm_end) = self.get_match(matched_indices, other);
 
         // get boundaries without the match (but keep the endpoints)
         let x = self.slice_from(norm_start + mlen as i64, self.len() - mlen + 1);
@@ -334,11 +354,18 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
         glued_seq.extend_from_slice(&y[..y.len() - 1]);
 
         // fix glued vertex angles at the transition of the boundaries
-        let a_yx = x[0] + y[y.len() - 1] - T::hturn();
-        let a_xy = y[0] + x[x.len() - 1] - T::hturn();
+        use super::angles::normalize_angle;
+
+        let a_yx = normalize_angle::<T>(x[0] + y[y.len() - 1] - T::hturn());
+        let a_xy = normalize_angle::<T>(y[0] + x[x.len() - 1] - T::hturn());
         glued_seq[0] = a_yx;
         glued_seq[x.len() - 1] = a_xy;
 
+        // a ±hturn junction angle means the path backtracks (degenerate glue)
+        if a_yx.abs() == T::hturn() || a_xy.abs() == T::hturn() {
+            return Err("Glue produces degenerate ±hturn junction angle");
+        }
+
         if unchecked {
             Ok(Self::from_slice_unchecked(glued_seq.as_slice()))
         } else {
@@ -347,7 +374,17 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
     }
 
     pub fn try_glue(&self, matched_indices: (i64, i64), other: &Self) -> Result<Self, &str> {
-        self.try_glue_impl(matched_indices, other, false)
+        let match_info = self.get_match(matched_indices, other);
+        self.try_glue_with_match_info(match_info, other, false)
+    }
+
+    pub(crate) fn try_glue_precomputed(
+        &self,
+        match_info: (i64, usize, i64),
+        other: &Self,
+        unchecked: bool,
+    ) -> Result<Self, &str> {
+        self.try_glue_with_match_info(match_info, other, unchecked)
     }
 
     pub fn glue(&self, matched_indices: (i64, i64), other: &Self) -> Self {
@@ -355,7 +392,9 @@ impl<T: IsComplex + IsRingOrField + Units> Rat<T> {
     }
 
     pub fn glue_unchecked(&self, matched_indices: (i64, i64), other: &Self) -> Self {
-        self.try_glue_impl(matched_indices, other, true).unwrap()
+        let match_info = self.get_match(matched_indices, other);
+        self.try_glue_with_match_info(match_info, other, true)
+            .unwrap()
     }
 
     // ----
@@ -389,6 +428,13 @@ impl<T: IsComplex + IsRingOrField + Units> Ord for Rat<T> {
         self.angles.cmp(&other.angles)
     }
 }
+
+impl<T: IsComplex + IsRingOrField + Units> std::hash::Hash for Rat<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.angles.hash(state);
+    }
+}
+
 impl<T: IsComplex + IsRingOrField + Units> Display for Rat<T> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         self.seq().fmt(f)
@@ -415,8 +461,8 @@ mod tests {
         assert_eq!(match_length(&[1, 2, 3], &[]), (0, 0));
         assert_eq!(match_length(&[1, 2, 3], &[1]), (0, 0));
 
-        // match cannot be longer than shortest sequence
-        assert_eq!(match_length(&[1, 2, 3], &[1, 2]), (1, 0));
+        // full match up to shorter sequence
+        assert_eq!(match_length(&[1, 2, 3], &[1, 2]), (2, 0));
         // the end points do not matter
         assert_eq!(match_length(&[1, 2, 3], &[4, 5]), (1, 0));
         // the inside points must be equal
@@ -424,6 +470,12 @@ mod tests {
         assert_eq!(match_length(&[1, 2, 3, 4], &[5, 2, 3, 6]), (3, 0));
         // match can be extended to the left (input is starting in the middle)
         assert_eq!(match_length(&[3, 4, 1, 2], &[3, 6, 5, 2]), (3, 2));
+        // full match: all positions equal
+        assert_eq!(match_length(&[1, 2, 3], &[1, 2, 3]), (3, 0));
+        // full match with backward extension
+        assert_eq!(match_length(&[3, 4, 1, 2], &[3, 4, 1, 2]), (4, 0));
+        // backward extension that consumes all remaining elements
+        assert_eq!(match_length(&[3, 4, 1, 2], &[3, 6, 1, 2]), (4, 3),);
     }
 
     #[test]
@@ -516,6 +568,12 @@ mod tests {
 
         // revcomp works respecting the current start offset
         assert_eq!(r.cycled(2), r.cycled(2).reversed().reversed());
+
+        // negative offsets work correctly
+        assert_eq!(r.seq(), r.cycled(-1).cycled(1).seq());
+        assert_eq!(r.cycled(1).seq(), r.cycled(-13).seq());
+        assert_eq!(r.cycled(-1).seq(), r.cycled(13).seq());
+        assert_eq!(r.cycled(-1).seq(), r.cycled(r.len() as i64 - 1).seq(),);
     }
 
     #[test]
diff --git a/src/intgeom/snake.rs b/src/intgeom/snake.rs
index dc154a5..cf43e69 100644
--- a/src/intgeom/snake.rs
+++ b/src/intgeom/snake.rs
@@ -1,5 +1,6 @@
 //! Abstract representation of points and polygonal segment chains.
 
+use std::collections::HashSet;
 use std::fmt::{Debug, Display};
 
 use num_complex::Complex;
@@ -82,6 +83,12 @@ pub struct Snake<T: IsRingOrField + IsComplex> {
 
     /// If set to true, the snake will NOT prevent self-intersections.
     allow_intersections: bool,
+
+    /// Set of all visited points for fast vertex-revisit detection.
+    /// Only populated when T::turn() == 4 (i.e. ZZ4), where all edges are
+    /// axis-aligned unit segments and intersection reduces to vertex revisits.
+    /// None for all other ring types (avoiding allocation/insertion overhead).
+    visited: Option<HashSet<T>>,
 }
 
 impl<I: ToPrimitive, T: IsComplex + IsRingOrField + Units> TryFrom<&[I]> for Snake<T> {
@@ -118,12 +125,20 @@ impl<T: IsComplex + IsRingOrField + Units> Snake<T> {
     pub fn new() -> Self {
         let mut grid = UnitSquareGrid::new();
         grid.add((0, 0), 0);
+        let visited = if T::turn() == 4 {
+            let mut s = HashSet::new();
+            s.insert(T::zero());
+            Some(s)
+        } else {
+            None
+        };
         Self {
             points: vec![T::zero(); 1],
             angles: Vec::new(),
             ang_sum: 0,
             grid,
             allow_intersections: false,
+            visited,
         }
     }
 
@@ -245,6 +260,9 @@ impl<T: IsComplex + IsRingOrField + Units> Snake<T> {
             .add(UnitSquareGrid::cell_of(new_pt), self.points.len());
         // add point to representative polyline
         self.points.push(new_pt);
+        if let Some(ref mut visited) = self.visited {
+            visited.insert(new_pt);
+        }
 
         // if the snake is closed, we need to fix up the start angle
         if self.is_closed() {
@@ -309,25 +327,24 @@ impl<T: IsComplex + IsRingOrField + Units> Snake<T> {
     /// on the assumption that all segments have unit length.
     fn can_add(&self, angle: i8) -> bool {
         if self.allow_intersections {
-            return true; // anything goes! WOOHOOO!!!
+            return true;
         }
 
         if self.is_closed() {
-            return false; // already closed -> adding anything makes it non-simple
+            return false;
         }
 
-        // end points of new candidate segment
         let new_seg @ (prev_pt, new_pt) = self.next_seg(angle);
-        // unit square lattice cell neighborhood of new segment
+
+        if let Some(ref visited) = self.visited {
+            return new_pt.is_zero() || !visited.contains(&new_pt);
+        }
+
         let neighborhood = UnitSquareGrid::seg_neighborhood_of(prev_pt, new_pt);
-        // all candidates for segment intersection
         let neighbor_segs = self.cell_segs(&neighborhood);
 
         let new_pt_nz = !new_pt.is_zero();
         for s @ (x, y) in neighbor_segs {
-            // if the new point is NOT the origin (i.e. closes the snake),
-            // explicitly check endpoints for intersection.
-            // (the segment intersection ignores this edge case)
             if new_pt_nz && (new_pt == x || new_pt == y) {
                 return false;
             }
@@ -445,7 +462,7 @@ impl<T: IsComplex> Display for Snake<T> {
 mod tests {
     use super::super::tiles::{hexagon, square, triangle};
     use super::*;
-    use crate::cyclotomic::{Ccw, SymNum, Z12, ZZ12};
+    use crate::cyclotomic::{Ccw, SymNum, Z12, ZZ12, ZZ4};
     use num_rational::Ratio;
     use num_traits::{One, Zero};
 
@@ -670,4 +687,27 @@ mod tests {
         s.add(7);
         assert_eq!(format!("{s}"), "[-2, -1, 0, 1, -5]");
     }
+
+    #[test]
+    fn test_zz4_visited_fast_path() {
+        let mut s: Snake<ZZ4> = Snake::new();
+        assert!(s.visited.is_some());
+        assert!(s.add(0));
+        assert!(s.add(0));
+        assert!(s.add(1));
+        assert!(s.add(1));
+        assert!(!s.add(1));
+    }
+
+    #[test]
+    fn test_zz4_visited_matches_grid() {
+        let mut s_grid: Snake<ZZ12> = Snake::new();
+        let mut s_fast: Snake<ZZ4> = Snake::new();
+        let zz4_angles: &[i8] = &[0, 1, 1, 1];
+        for &a in zz4_angles {
+            assert_eq!(s_grid.add(a * 3), s_fast.add(a), "angle {a}");
+        }
+        assert!(s_fast.is_closed());
+        assert!(s_grid.is_closed());
+    }
 }
diff --git a/src/intgeom/tileset.rs b/src/intgeom/tileset.rs
new file mode 100644
index 0000000..ff4056a
--- /dev/null
+++ b/src/intgeom/tileset.rs
@@ -0,0 +1,999 @@
+use std::collections::{BTreeMap, BTreeSet};
+use std::fmt;
+use std::ops::Range;
+
+use crate::cyclotomic::{IsComplex, IsRingOrField, Units};
+use crate::intgeom::rat::Rat;
+use crate::intgeom::snake::Snake;
+use crate::stringmatch::CyclicMatchIndex;
+
+pub struct GlueOp<T: IsComplex> {
+    pub tile_a: usize,
+    pub tile_b: usize,
+    pub start_a: i64,
+    pub end_b: i64,
+    pub match_len: usize,
+    pub result: Rat<T>,
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct GlueStats {
+    pub cmi_candidates: usize,
+    pub cmi_sequences: usize,
+    pub se_total_pairs: usize,
+    pub se_pass_heuristic: usize,
+    pub se_sequences: usize,
+    pub unique_sequences: usize,
+    pub snake_valid: usize,
+}
+
+impl GlueStats {
+    pub fn total_sequences(&self) -> usize {
+        self.cmi_sequences + self.se_sequences
+    }
+
+    pub fn snake_failures(&self) -> usize {
+        self.unique_sequences - self.snake_valid
+    }
+}
+
+impl fmt::Display for GlueStats {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let cmi_pre = self.cmi_candidates.saturating_sub(self.cmi_sequences);
+        let se_heuristic_rej = self.se_total_pairs.saturating_sub(self.se_pass_heuristic);
+        let se_len_rej = self.se_pass_heuristic.saturating_sub(self.se_sequences);
+        let dedup_savings = self.total_sequences().saturating_sub(self.unique_sequences);
+        write!(
+            f,
+            "cmi: {} seeds → {} pre-filtered, {} seqs | \
+             se: {} pairs → {} heuristic rej, {} len rej, {} seqs | \
+             {} total seqs → {} unique (dedup saved {}) → {} valid ({} failures, {:.1}% accept)",
+            self.cmi_candidates,
+            cmi_pre,
+            self.cmi_sequences,
+            self.se_total_pairs,
+            se_heuristic_rej,
+            se_len_rej,
+            self.se_sequences,
+            self.total_sequences(),
+            self.unique_sequences,
+            dedup_savings,
+            self.snake_valid,
+            self.snake_failures(),
+            if self.unique_sequences > 0 {
+                self.snake_valid as f64 / self.unique_sequences as f64 * 100.0
+            } else {
+                0.0
+            },
+        )
+    }
+}
+
+impl std::ops::AddAssign for GlueStats {
+    fn add_assign(&mut self, other: Self) {
+        self.cmi_candidates += other.cmi_candidates;
+        self.cmi_sequences += other.cmi_sequences;
+        self.se_total_pairs += other.se_total_pairs;
+        self.se_pass_heuristic += other.se_pass_heuristic;
+        self.se_sequences += other.se_sequences;
+        self.unique_sequences += other.unique_sequences;
+        self.snake_valid += other.snake_valid;
+    }
+}
+
+impl<T: IsComplex + IsRingOrField + Units> std::fmt::Debug for GlueOp<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "GlueOp(a={}, b={}, start_a={}, end_b={}, len={}, result={})",
+            self.tile_a, self.tile_b, self.start_a, self.end_b, self.match_len, self.result,
+        )
+    }
+}
+
+pub struct TileSet<T: IsComplex> {
+    rats: Vec<Rat<T>>,
+    cmi: CyclicMatchIndex,
+}
+
+impl<T: IsComplex + IsRingOrField + Units> TileSet<T> {
+    pub fn new(rats: Vec<Rat<T>>) -> Self {
+        assert!(!rats.is_empty(), "need at least one tile");
+
+        let chirality = rats[0].chirality();
+        for (i, r) in rats.iter().enumerate() {
+            assert_eq!(
+                r.chirality(),
+                chirality,
+                "mixed chirality: tile {i} has chirality {} but expected {}",
+                r.chirality(),
+                chirality,
+            );
+        }
+
+        let sequences: Vec<Vec<i8>> = rats.iter().map(|r| r.seq().to_vec()).collect();
+        let cmi = CyclicMatchIndex::new(&sequences);
+
+        TileSet { rats, cmi }
+    }
+
+    pub fn num_tiles(&self) -> usize {
+        self.rats.len()
+    }
+
+    pub fn rat(&self, i: usize) -> &Rat<T> {
+        &self.rats[i]
+    }
+
+    pub fn shared_boundaries(&self, i: usize, j: usize) -> Vec<crate::stringmatch::CyclicMatch> {
+        self.cmi.maximal_rc_matches(i, j)
+    }
+
+    fn sort_by_interval(results: &mut [GlueOp<T>]) {
+        results.sort_by(|a, b| {
+            a.start_a
+                .cmp(&b.start_a)
+                .then_with(|| a.end_b.cmp(&b.end_b))
+        });
+    }
+
+    fn sort_global(results: &mut [GlueOp<T>]) {
+        results.sort_by(|a, b| {
+            a.tile_a
+                .cmp(&b.tile_a)
+                .then_with(|| a.tile_b.cmp(&b.tile_b))
+                .then_with(|| a.start_a.cmp(&b.start_a))
+                .then_with(|| a.end_b.cmp(&b.end_b))
+        });
+    }
+
+    // --- Layer 1: unchecked candidate generation ---
+
+    /// Generate all glue candidates for tiles `i` and `j`, computing glued
+    /// sequences via unchecked Rats and grouping by canonical form.
+    /// No geometric (Snake) validation is performed.
+    fn valid_glues_unchecked(
+        &self,
+        i: usize,
+        j: usize,
+    ) -> (BTreeMap<Rat<T>, Vec<GlueOp<T>>>, GlueStats) {
+        let a = &self.rats[i];
+        let b = &self.rats[j];
+        let n_a = a.len();
+        let n_b = b.len();
+
+        let mut stats = GlueStats::default();
+        let mut groups: BTreeMap<Rat<T>, Vec<GlueOp<T>>> = BTreeMap::new();
+
+        if n_a == 0 || n_b == 0 {
+            return (groups, stats);
+        }
+
+        let cmi_matches = self.cmi.maximal_rc_matches(i, j);
+        stats.cmi_candidates = cmi_matches.len();
+
+        for m in &cmi_matches {
+            let (ns, len, ne) = a.get_match((m.pos_a as i64, m.pos_b as i64), b);
+            if len <= 1 {
+                continue;
+            }
+            if !junction_gap_nonnegative(a.seq(), ns as usize, len, b.seq(), ne as usize) {
+                continue;
+            }
+            stats.cmi_sequences += 1;
+            if let Ok(glued) = a.try_glue_precomputed((ns, len, ne), b, true) {
+                groups.entry(glued.clone()).or_default().push(GlueOp {
+                    tile_a: i,
+                    tile_b: j,
+                    start_a: ns,
+                    end_b: ne,
+                    match_len: len,
+                    result: glued,
+                });
+            }
+        }
+
+        let seq_a = a.seq();
+        let seq_b = b.seq();
+        stats.se_total_pairs = n_a * n_b;
+
+        for ia in 0..n_a {
+            for ib in 0..n_b {
+                if !is_single_edge_candidate(seq_a, ia, seq_b, ib) {
+                    continue;
+                }
+                stats.se_pass_heuristic += 1;
+                let (ns, len, ne) = a.get_match((ia as i64, ib as i64), b);
+                if len != 1 {
+                    continue;
+                }
+                stats.se_sequences += 1;
+                if let Ok(glued) = a.try_glue_precomputed((ns, len, ne), b, true) {
+                    groups.entry(glued.clone()).or_default().push(GlueOp {
+                        tile_a: i,
+                        tile_b: j,
+                        start_a: ns,
+                        end_b: ne,
+                        match_len: len,
+                        result: glued,
+                    });
+                }
+            }
+        }
+
+        (groups, stats)
+    }
+
+    // --- Layer 2: shared geometric validation ---
+
+    /// Validate unique glued sequences via Snake construction.
+    /// For each unique Rat key, runs `Snake::try_from` once.
+    /// Returns all GlueOps whose sequences pass, plus validation counts.
+    fn validate_glue_groups(
+        groups: BTreeMap<Rat<T>, Vec<GlueOp<T>>>,
+        stats: &mut GlueStats,
+    ) -> Vec<GlueOp<T>> {
+        stats.unique_sequences = groups.len();
+        let mut results = Vec::new();
+        for (rat, ops) in groups {
+            if Snake::<T>::try_from(rat.seq()).is_ok() {
+                stats.snake_valid += 1;
+                results.extend(ops);
+            }
+        }
+        results
+    }
+
+    // --- Layer 3: public API ---
+
+    /// Like [`valid_glues_for_pairs_with_stats`] but returns only distinct
+    /// valid Rat shapes, discarding GlueOps. Uses BTreeSet instead of
+    /// BTreeMap<Rat, Vec<GlueOp>>, dramatically reducing memory for large
+    /// numbers of tile pairs where only the distinct patch shapes matter.
+    pub fn valid_rats_for_pairs(&self, pairs: &[(usize, usize)]) -> (BTreeSet<Rat<T>>, GlueStats) {
+        let mut all_keys: BTreeSet<Rat<T>> = BTreeSet::new();
+        let mut stats = GlueStats::default();
+        for &(i, j) in pairs {
+            let (groups, s) = self.valid_glues_unchecked(i, j);
+            stats.cmi_candidates += s.cmi_candidates;
+            stats.cmi_sequences += s.cmi_sequences;
+            stats.se_total_pairs += s.se_total_pairs;
+            stats.se_pass_heuristic += s.se_pass_heuristic;
+            stats.se_sequences += s.se_sequences;
+            for (rat, _) in groups {
+                all_keys.insert(rat);
+            }
+        }
+        stats.unique_sequences = all_keys.len();
+        let mut valid = BTreeSet::new();
+        for rat in all_keys {
+            if Snake::<T>::try_from(rat.seq()).is_ok() {
+                stats.snake_valid += 1;
+                valid.insert(rat);
+            }
+        }
+        (valid, stats)
+    }
+
+    /// Find all valid glue operations for the given pairs of tiles,
+    /// collecting unchecked candidates across all pairs, deduplicating
+    /// by canonical form, then validating unique sequences once.
+    /// Returns both results and detailed candidate funnel statistics.
+    pub fn valid_glues_for_pairs_with_stats(
+        &self,
+        pairs: &[(usize, usize)],
+    ) -> (Vec<GlueOp<T>>, GlueStats) {
+        let mut all_groups: BTreeMap<Rat<T>, Vec<GlueOp<T>>> = BTreeMap::new();
+        let mut stats = GlueStats::default();
+        for &(i, j) in pairs {
+            let (groups, s) = self.valid_glues_unchecked(i, j);
+            stats.cmi_candidates += s.cmi_candidates;
+            stats.cmi_sequences += s.cmi_sequences;
+            stats.se_total_pairs += s.se_total_pairs;
+            stats.se_pass_heuristic += s.se_pass_heuristic;
+            stats.se_sequences += s.se_sequences;
+            for (rat, ops) in groups {
+                all_groups.entry(rat).or_default().extend(ops);
+            }
+        }
+        let mut results = Self::validate_glue_groups(all_groups, &mut stats);
+        Self::sort_global(&mut results);
+        (results, stats)
+    }
+
+    /// Find all valid glue operations between tiles `i` and `j`,
+    /// returning both results and detailed candidate funnel statistics.
+    ///
+    /// See [`valid_glues`] for phase documentation.
+    pub fn valid_glues_with_stats(&self, i: usize, j: usize) -> (Vec<GlueOp<T>>, GlueStats) {
+        let (groups, mut stats) = self.valid_glues_unchecked(i, j);
+        let mut results = Self::validate_glue_groups(groups, &mut stats);
+        Self::sort_by_interval(&mut results);
+        (results, stats)
+    }
+
+    /// Find all valid glue operations between tiles `i` and `j`.
+    ///
+    /// Uses two phases that are disjoint by construction:
+    ///
+    /// **Phase 1 (multi-edge, CMI-seeded):** The Cyclic Match Index finds
+    /// maximal reverse-complement matches. Each CMI match start is seeded,
+    /// and only matches of length ≥ 2 are accepted. The junction gap check
+    /// rejects only overlaps (gap < 0), allowing collinear junctions.
+    ///
+    /// **Phase 2 (single-edge scan):** Enumerates all `(ia, ib)` pairs and
+    /// applies the interior-angle heuristic to reject candidates where the
+    /// junction gap is ≤ 0 (overlap or collinear). Only matches of length
+    /// exactly 1 are accepted — longer matches are found by phase 1.
+    ///
+    /// The two phases produce disjoint result sets by match length, so no
+    /// deduplication is needed.
+    pub fn valid_glues(&self, i: usize, j: usize) -> Vec<GlueOp<T>> {
+        self.valid_glues_with_stats(i, j).0
+    }
+
+    pub fn all_valid_glues_with_stats(&self) -> (Vec<GlueOp<T>>, GlueStats) {
+        let n = self.rats.len();
+        let pairs: Vec<(usize, usize)> = (0..n).flat_map(|i| (0..n).map(move |j| (i, j))).collect();
+        self.valid_glues_for_pairs_with_stats(&pairs)
+    }
+
+    pub fn all_valid_glues(&self) -> Vec<GlueOp<T>> {
+        self.all_valid_glues_with_stats().0
+    }
+
+    pub fn valid_glues_containing(
+        &self,
+        i: usize,
+        range: Range<usize>,
+        j: usize,
+    ) -> Vec<GlueOp<T>> {
+        let n_a = self.rats[i].len();
+        let n_b = self.rats[j].len();
+
+        if n_a == 0 || n_b == 0 || range.is_empty() || range.end > n_a {
+            return vec![];
+        }
+
+        let results: Vec<GlueOp<T>> = self
+            .valid_glues(i, j)
+            .into_iter()
+            .filter(|g| match_covers_range(g.start_a, g.match_len, &range, n_a))
+            .collect();
+        results
+    }
+}
+
+fn match_covers_range(norm_start: i64, match_len: usize, range: &Range<usize>, n: usize) -> bool {
+    if match_len < range.end - range.start {
+        return false;
+    }
+    for v in range.clone() {
+        let d = ((v as i64 - norm_start).rem_euclid(n as i64)) as usize;
+        if d >= match_len {
+            return false;
+        }
+    }
+    true
+}
+
+/// Check whether a single-edge glue at positions `(ia, ib)` is a locally
+/// consistent candidate based on interior angle sums at both junction vertices.
+///
+/// When two tiles share a single edge, two junction vertices are formed
+/// at the endpoints. At each junction, four edges meet — two from each tile.
+/// The tiles' interior angles at the junction sum to:
+///
+/// > (hturn − exterior_a) + (hturn − exterior_b) = 2·hturn − (exterior_a + exterior_b)
+///
+/// * **Overflow** (`exterior_a + exterior_b < 0`): the interior angles sum to
+///   more than a full circle, so the neighbor edges physically overlap → the
+///   glue creates a self-intersection.
+/// * **Collinear** (`exterior_a + exterior_b == 0`): the interior angles sum
+///   to exactly a full circle, so the neighbor edges are collinear → this is a
+///   multi-edge extension already handled by the CMI index.
+/// * **Gap** (`exterior_a + exterior_b > 0`): the interior angles sum to less
+///   than a full circle, so there is a gap between the neighbor edges → the
+///   candidate is locally consistent.
+///
+/// Returns `true` only when **both** junctions have a positive gap,
+/// meaning the candidate is a true single-edge match that needs only
+/// the global self-intersection check (`try_glue`).
+pub(crate) fn is_single_edge_candidate(a: &[i8], ia: usize, b: &[i8], ib: usize) -> bool {
+    let na = a.len();
+    let nb = b.len();
+    let left = a[ia] as i32 + b[ib] as i32;
+    let right = a[(ia + 1) % na] as i32 + b[(ib + nb - 1) % nb] as i32;
+    left > 0 && right > 0
+}
+
+pub(crate) fn junction_gap_nonnegative(
+    a: &[i8],
+    ns: usize,
+    mlen: usize,
+    b: &[i8],
+    ne: usize,
+) -> bool {
+    let na = a.len();
+    let nb = b.len();
+    let left = a[(ns + mlen) % na] as i32 + b[(ne + nb - mlen) % nb] as i32;
+    let right = a[ns] as i32 + b[ne] as i32;
+    left >= 0 && right >= 0
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::cyclotomic::ZZ12;
+    use crate::intgeom::rat::Rat;
+    use crate::intgeom::snake::Snake;
+    use crate::intgeom::tiles::{hexagon, spectre};
+    use std::collections::{BTreeSet, HashSet};
+
+    const MYSTIC_ZZ12: &[i8] = &[
+        0, 2, -3, 2, 3, -2, 3, -2, 3, 2, -3, 2, 0, 2, -3, 2, 3, 2, -3, 2,
+    ];
+
+    #[test]
+    fn spectre_self_finds_mystic() {
+        let s: Snake<ZZ12> = spectre();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let ts = TileSet::new(vec![r.clone()]);
+
+        let glues = ts.valid_glues(0, 0);
+        assert!(
+            glues.iter().any(|g| g.result.seq() == MYSTIC_ZZ12),
+            "mystic not found in {} glues: {:?}",
+            glues.len(),
+            glues,
+        );
+    }
+
+    #[test]
+    fn hexagon_self_single_edge() {
+        let h: Snake<ZZ12> = hexagon();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&h);
+        let ts = TileSet::new(vec![r.clone()]);
+
+        let glues = ts.valid_glues(0, 0);
+        assert!(!glues.is_empty(), "hexagon should have valid self-glues");
+
+        let expected = &[-2, 2, 2, 2, 2, -2, 2, 2, 2, 2];
+        assert!(
+            glues.iter().any(|g| g.result.seq() == expected),
+            "hex+hex glue not found in {} glues",
+            glues.len(),
+        );
+    }
+
+    #[test]
+    fn self_intersecting_filtered() {
+        let r1 = Rat::<ZZ12>::from_slice_unchecked(&[0, 0, 3, 0, 3, 3, -3, -3, 3, 3, 0, 3]);
+        let r2 = Rat::<ZZ12>::from_slice_unchecked(&[0, 0, 3, 3, 0, 0, 3, 3]);
+        let ts = TileSet::new(vec![r1.clone(), r2.clone()]);
+
+        assert!(r1.try_glue((8, 0), &r2).is_err());
+
+        let glues = ts.valid_glues(0, 1);
+        for g in &glues {
+            assert!(
+                !(g.start_a == 8 && g.end_b == 0),
+                "self-intersecting glue should be filtered"
+            );
+        }
+    }
+
+    #[test]
+    fn vertex_touching_rejected() {
+        let r3 = Rat::<ZZ12>::try_from(
+            &Snake::try_from(&[
+                0, 0, 3, 2, 4, -3, -3, 3, 2, 4, -3, -3, 3, 0, 2, 4, -3, -3, 3, 2, 4, -3, 3, -3,
+            ])
+            .unwrap(),
+        )
+        .unwrap();
+        let r4 = Rat::<ZZ12>::from_slice_unchecked(&[0, 0, 3, 3, 0, 0, 3, 3]);
+        let ts = TileSet::new(vec![r3.clone(), r4.clone()]);
+
+        assert!(r3.try_glue((7, 0), &r4).is_err());
+
+        let glues = ts.valid_glues(0, 1);
+        for g in &glues {
+            assert!(
+                !(g.start_a == 7 && g.end_b == 0),
+                "vertex-touching glue should be filtered"
+            );
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "mixed chirality")]
+    fn mixed_chirality_panics() {
+        let ccw = Rat::<ZZ12>::from_slice_unchecked(&[1, 1, 1, 1]);
+        let cw = ccw.reversed();
+        let _ts = TileSet::new(vec![ccw, cw]);
+    }
+
+    #[test]
+    fn deterministic_order() {
+        let h: Snake<ZZ12> = hexagon();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&h);
+        let ts = TileSet::new(vec![r]);
+        let glues = ts.valid_glues(0, 0);
+        for w in glues.windows(2) {
+            assert!(
+                w[0].start_a <= w[1].start_a
+                    && (w[0].start_a < w[1].start_a || w[0].end_b <= w[1].end_b),
+                "glues not in deterministic order"
+            );
+        }
+    }
+
+    #[test]
+    fn multi_tile_all_valid_glues() {
+        let sq = Rat::<ZZ12>::from_slice_unchecked(&[0, 1, 0, 1, 0, 1, 0, 1]);
+        let tri = Rat::<ZZ12>::from_slice_unchecked(&[4, 4, 4]);
+        let ts = TileSet::new(vec![sq, tri]);
+
+        let all = ts.all_valid_glues();
+        for w in all.windows(2) {
+            assert!(
+                w[0].tile_a < w[1].tile_a
+                    || (w[0].tile_a == w[1].tile_a && w[0].tile_b < w[1].tile_b)
+                    || (w[0].tile_a == w[1].tile_a
+                        && w[0].tile_b == w[1].tile_b
+                        && (w[0].start_a < w[1].start_a
+                            || (w[0].start_a == w[1].start_a && w[0].end_b <= w[1].end_b))),
+                "all_valid_glues not sorted"
+            );
+        }
+    }
+
+    #[test]
+    fn containing_query_spectre() {
+        let s: Snake<ZZ12> = spectre();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let ts = TileSet::new(vec![r]);
+
+        let glues = ts.valid_glues_containing(0, 2..3, 0);
+        assert!(
+            glues.iter().any(|g| g.result.seq() == MYSTIC_ZZ12),
+            "containing query should find mystic, got {} glues: {:?}",
+            glues.len(),
+            glues,
+        );
+    }
+
+    fn brute_force_valid_glues<T: IsComplex + IsRingOrField + Units>(
+        a: &Rat<T>,
+        b: &Rat<T>,
+        i: usize,
+        j: usize,
+    ) -> Vec<GlueOp<T>> {
+        let mut seen: HashSet<(i64, usize, i64)> = HashSet::new();
+        let mut results: Vec<GlueOp<T>> = Vec::new();
+        for ia in 0..a.len() {
+            for ib in 0..b.len() {
+                let match_info = a.get_match((ia as i64, ib as i64), b);
+                let (ns, len, ne) = match_info;
+                if len == 0 || !seen.insert((ns, len, ne)) {
+                    continue;
+                }
+                if let Ok(glued) = a.try_glue_precomputed(match_info, b, false) {
+                    results.push(GlueOp {
+                        tile_a: i,
+                        tile_b: j,
+                        start_a: ns,
+                        end_b: ne,
+                        match_len: len,
+                        result: glued,
+                    });
+                }
+            }
+        }
+        results.sort_by(|x, y| {
+            x.start_a
+                .cmp(&y.start_a)
+                .then_with(|| x.end_b.cmp(&y.end_b))
+        });
+        results
+    }
+
+    fn assert_glue_sets_match<T: IsComplex + IsRingOrField + Units>(
+        ts_glues: &[GlueOp<T>],
+        bf_glues: &[GlueOp<T>],
+        label: &str,
+    ) {
+        assert_eq!(
+            ts_glues.len(),
+            bf_glues.len(),
+            "{label}: count mismatch: TileSet={}, brute_force={}",
+            ts_glues.len(),
+            bf_glues.len(),
+        );
+        for (idx, (ts_g, bf_g)) in ts_glues.iter().zip(bf_glues.iter()).enumerate() {
+            assert_eq!(
+                (ts_g.start_a, ts_g.match_len, ts_g.end_b),
+                (bf_g.start_a, bf_g.match_len, bf_g.end_b),
+                "{label}: interval mismatch at index {idx}",
+            );
+            assert_eq!(
+                ts_g.result, bf_g.result,
+                "{label}: result mismatch at ({}, {})",
+                ts_g.start_a, ts_g.end_b,
+            );
+        }
+    }
+
+    #[test]
+    fn spectre_foldback_cases_accepted() {
+        let s: Snake<ZZ12> = spectre();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let cases: Vec<(usize, usize)> = vec![(0, 4), (0, 8), (6, 4), (6, 8)];
+        for (ia, ib) in cases {
+            assert!(
+                r.try_glue((ia as i64, ib as i64), &r).is_ok(),
+                "spectre foldback ({ia},{ib}) should be accepted by try_glue",
+            );
+        }
+    }
+
+    #[test]
+    fn hexagon_pair_exhaustive() {
+        let h: Snake<ZZ12> = hexagon();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&h);
+        let ts = TileSet::new(vec![r.clone(), r.clone()]);
+
+        let ts_glues = ts.valid_glues(0, 1);
+        let bf_glues = brute_force_valid_glues(&r, &r, 0, 1);
+        assert_glue_sets_match(&ts_glues, &bf_glues, "hexagon pair");
+
+        for g in &ts_glues {
+            assert_eq!(g.match_len, 1, "hexagon pair glue should be single-edge");
+            assert_eq!(g.result.len(), 10, "hex+hex result should be 10-gon");
+        }
+
+        let canonical = &ts_glues[0].result;
+        for (idx, g) in ts_glues.iter().enumerate() {
+            assert_eq!(
+                g.result, *canonical,
+                "hex+hex glue #{idx} should yield same shape",
+            );
+        }
+
+        assert_eq!(
+            ts_glues.len(),
+            36,
+            "hexagon pair should have 36 valid glues"
+        );
+    }
+
+    #[test]
+    fn hexamino_pair_exhaustive() {
+        let h: Snake<ZZ12> = hexagon();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&h);
+        let hexamino = r.glue((0, 0), &r);
+
+        let ts = TileSet::new(vec![hexamino.clone(), hexamino.clone()]);
+        let ts_glues = ts.valid_glues(0, 1);
+        let bf_glues = brute_force_valid_glues(&hexamino, &hexamino, 0, 1);
+        assert_glue_sets_match(&ts_glues, &bf_glues, "hexamino pair");
+
+        assert!(
+            !ts_glues.is_empty(),
+            "hexamino pair should have valid glues"
+        );
+
+        let multi_count = ts_glues.iter().filter(|g| g.match_len > 1).count();
+        let single_count = ts_glues.iter().filter(|g| g.match_len == 1).count();
+        assert!(
+            multi_count > 0,
+            "hexamino pair should have multi-edge matches",
+        );
+        assert!(
+            single_count > 0,
+            "hexamino pair should have single-edge matches",
+        );
+    }
+
+    #[test]
+    fn spectre_pair_exhaustive() {
+        let s: Snake<ZZ12> = spectre();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let ts = TileSet::new(vec![r.clone(), r.clone()]);
+
+        let ts_glues = ts.valid_glues(0, 1);
+        let bf_glues = brute_force_valid_glues(&r, &r, 0, 1);
+        assert_glue_sets_match(&ts_glues, &bf_glues, "spectre pair");
+
+        assert!(
+            ts_glues.iter().any(|g| g.result.seq() == MYSTIC_ZZ12),
+            "spectre pair should find mystic, got {} glues",
+            ts_glues.len(),
+        );
+
+        let multi_count = ts_glues.iter().filter(|g| g.match_len > 1).count();
+        let single_count = ts_glues.iter().filter(|g| g.match_len == 1).count();
+        assert!(
+            multi_count > 0,
+            "spectre pair should have multi-edge matches"
+        );
+        assert!(
+            single_count > 0,
+            "spectre pair should have single-edge matches"
+        );
+    }
+
+    #[test]
+    fn mixed_shapes_exhaustive() {
+        let sq = Rat::<ZZ12>::from_slice_unchecked(&[0, 1, 0, 1, 0, 1, 0, 1]);
+        let tri = Rat::<ZZ12>::from_slice_unchecked(&[4, 4, 4]);
+        let ts = TileSet::new(vec![sq.clone(), tri.clone()]);
+
+        for i in 0..2 {
+            for j in i..2 {
+                let ts_glues = ts.valid_glues(i, j);
+                let bf_glues = brute_force_valid_glues(ts.rat(i), ts.rat(j), i, j);
+                assert_glue_sets_match(&ts_glues, &bf_glues, &format!("mixed shapes ({i},{j})"));
+            }
+        }
+    }
+
+    #[test]
+    fn single_edge_candidate_unit() {
+        let hex: &[i8] = &[2, 2, 2, 2, 2, 2];
+        assert!(
+            is_single_edge_candidate(hex, 0, hex, 0),
+            "hex (0,0): 2+2=4 > 0 on both sides"
+        );
+        assert!(
+            is_single_edge_candidate(hex, 0, hex, 3),
+            "hex (0,3): 2+2=4 > 0 on both sides"
+        );
+
+        let hexamino: &[i8] = &[-2, 2, 2, 2, 2, -2, 2, 2, 2, 2];
+        assert!(
+            !is_single_edge_candidate(hexamino, 0, hexamino, 0),
+            "hexamino (0,0): left=-2+(-2)=-4 < 0, overflow"
+        );
+        assert!(
+            !is_single_edge_candidate(hexamino, 1, hexamino, 1),
+            "hexamino (1,1): right=2+(-2)=0, collinear extension"
+        );
+        assert!(
+            is_single_edge_candidate(hexamino, 1, hexamino, 2),
+            "hexamino (1,2): left=2+2=4, right=2+2=4, both gaps"
+        );
+
+        let spectre: &[i8] = &[3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2];
+        assert!(
+            !is_single_edge_candidate(spectre, 3, spectre, 4),
+            "spectre (3,4): left=2+(-3)=-1 < 0, overflow"
+        );
+        assert!(
+            is_single_edge_candidate(spectre, 0, spectre, 1),
+            "spectre (0,1): left=3+2=5, right=2+3=5, both gaps"
+        );
+    }
+
+    #[test]
+    fn interior_angle_heuristic_sound() {
+        let hex = Rat::<ZZ12>::from_unchecked(&hexagon());
+        let spec = Rat::<ZZ12>::from_unchecked(&spectre());
+        let hexamino = hex.glue((0, 0), &hex);
+
+        let tiles: Vec<(&str, &Rat<ZZ12>)> =
+            vec![("hex", &hex), ("spec", &spec), ("hexamino", &hexamino)];
+
+        for (label, rat) in &tiles {
+            let n = rat.len();
+            let seq = rat.seq();
+            for ia in 0..n {
+                for ib in 0..n {
+                    let (_, len, _) = rat.get_match((ia as i64, ib as i64), rat);
+                    if len != 1 {
+                        continue;
+                    }
+                    if rat.try_glue((ia as i64, ib as i64), rat).is_ok() {
+                        assert!(
+                            is_single_edge_candidate(seq, ia, seq, ib),
+                            "{label}: try_glue accepted single-edge ({ia},{ib}) but heuristic rejected",
+                        );
+                    }
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn containing_query_exhaustive() {
+        let spec = Rat::<ZZ12>::from_unchecked(&spectre());
+        let hexamino =
+            Rat::<ZZ12>::from_unchecked(&hexagon()).glue((0, 0), &Rat::from_unchecked(&hexagon()));
+
+        for (label, rat) in [("spec", &spec), ("hexamino", &hexamino)] {
+            let ts = TileSet::new(vec![rat.clone()]);
+            let all = ts.valid_glues(0, 0);
+            let n = rat.len();
+
+            for edge in 0..n {
+                let containing = ts.valid_glues_containing(0, edge..edge + 1, 0);
+                let expected_intervals: Vec<_> = all
+                    .iter()
+                    .filter(|g| match_covers_range(g.start_a, g.match_len, &(edge..edge + 1), n))
+                    .map(|g| (g.start_a, g.match_len, g.end_b))
+                    .collect();
+                let got_intervals: Vec<_> = containing
+                    .iter()
+                    .map(|g| (g.start_a, g.match_len, g.end_b))
+                    .collect();
+                assert_eq!(
+                    got_intervals, expected_intervals,
+                    "{label}: edge {edge}: containing query mismatch",
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn empty_tile_valid_glues() {
+        let hex = Rat::<ZZ12>::from_unchecked(&hexagon());
+        let ts = TileSet::new(vec![hex.clone(), hex]);
+
+        // a TileSet with all-valid tiles returns empty for i!=j
+        // when there's only one distinct tile shape
+        assert!(!ts.valid_glues(0, 1).is_empty());
+    }
+
+    #[test]
+    fn containing_query_edge_cases() {
+        let s: Snake<ZZ12> = spectre();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let n = r.len();
+        let ts = TileSet::new(vec![r]);
+
+        // empty range returns nothing
+        assert!(ts.valid_glues_containing(0, 0..0, 0).is_empty());
+
+        // range.end > n returns nothing
+        assert!(ts.valid_glues_containing(0, 0..n + 1, 0).is_empty());
+    }
+
+    #[test]
+    fn shared_boundaries_returns_cmi_matches() {
+        let h: Snake<ZZ12> = hexagon();
+        let r: Rat<ZZ12> = Rat::from_unchecked(&h);
+        let ts = TileSet::new(vec![r.clone(), r.clone()]);
+
+        // hexagon has all-positive angles, RC is all-negative, no matches
+        let boundaries = ts.shared_boundaries(0, 1);
+        assert!(
+            boundaries.is_empty(),
+            "hexagon pair should have no RC matches: {:?}",
+            boundaries,
+        );
+
+        let self_boundaries = ts.shared_boundaries(0, 0);
+        assert!(
+            self_boundaries.is_empty(),
+            "hexagon self should have no RC matches: {:?}",
+            self_boundaries,
+        );
+
+        // spectre has non-trivial RC self-matches
+        let s: Snake<ZZ12> = spectre();
+        let spec: Rat<ZZ12> = Rat::from_unchecked(&s);
+        let ts2 = TileSet::new(vec![spec.clone(), spec]);
+        let cross = ts2.shared_boundaries(0, 1);
+        assert!(!cross.is_empty(), "spectre pair should have RC matches",);
+        let max_len = cross.iter().map(|m| m.len).max().unwrap();
+        assert_eq!(max_len, 3, "spectre max RC cross-match should be 3");
+    }
+
+    #[test]
+    fn heuristic_sound_on_size4_hexagon_patches() {
+        let hex: Rat<ZZ12> = Rat::from_unchecked(&hexagon());
+
+        let ts1 = TileSet::new(vec![hex.clone()]);
+        let size2: Vec<Rat<ZZ12>> = ts1
+            .all_valid_glues()
+            .iter()
+            .map(|g| g.result.clone())
+            .collect::<BTreeSet<_>>()
+            .into_iter()
+            .collect();
+
+        let ts2 = TileSet::new(size2.clone());
+        let size4: Vec<Rat<ZZ12>> = ts2
+            .all_valid_glues()
+            .iter()
+            .map(|g| g.result.clone())
+            .collect::<BTreeSet<_>>()
+            .into_iter()
+            .collect();
+
+        let mut false_rejects = 0;
+
+        for a in &size4 {
+            for b in &size4 {
+                let seq_a = a.seq();
+                let seq_b = b.seq();
+                for ia in 0..a.len() {
+                    for ib in 0..b.len() {
+                        let (_, len, _) = a.get_match((ia as i64, ib as i64), b);
+                        if len != 1 {
+                            continue;
+                        }
+                        if a.try_glue((ia as i64, ib as i64), b).is_err() {
+                            continue;
+                        }
+                        if !is_single_edge_candidate(seq_a, ia, seq_b, ib) {
+                            false_rejects += 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        assert_eq!(
+            false_rejects, 0,
+            "heuristic falsely rejected {false_rejects} valid single-edge glues"
+        );
+    }
+
+    #[test]
+    fn size8_hexagon_valid_glues_matches_brute_force() {
+        let hex: Rat<ZZ12> = Rat::from_unchecked(&hexagon());
+
+        let ts1 = TileSet::new(vec![hex.clone()]);
+        let size2: Vec<Rat<ZZ12>> = ts1
+            .all_valid_glues()
+            .iter()
+            .map(|g| g.result.clone())
+            .collect::<BTreeSet<_>>()
+            .into_iter()
+            .collect();
+
+        let ts2 = TileSet::new(size2.clone());
+        let size4: Vec<Rat<ZZ12>> = ts2
+            .all_valid_glues()
+            .iter()
+            .map(|g| g.result.clone())
+            .collect::<BTreeSet<_>>()
+            .into_iter()
+            .collect();
+
+        let ts4 = TileSet::new(size4.clone());
+
+        let mut index_results: BTreeSet<Rat<ZZ12>> = BTreeSet::new();
+        for i in 0..size4.len() {
+            for j in 0..size4.len() {
+                for g in ts4.valid_glues(i, j) {
+                    index_results.insert(g.result);
+                }
+            }
+        }
+
+        let mut bf_results: BTreeSet<Rat<ZZ12>> = BTreeSet::new();
+        for a in &size4 {
+            for b in &size4 {
+                for ia in 0..a.len() {
+                    for ib in 0..b.len() {
+                        if let Ok(glued) = a.try_glue((ia as i64, ib as i64), b) {
+                            bf_results.insert(glued);
+                        }
+                    }
+                }
+            }
+        }
+
+        eprintln!(
+            "index: {}, brute_force: {}",
+            index_results.len(),
+            bf_results.len(),
+        );
+
+        let missing: Vec<_> = bf_results.difference(&index_results).collect();
+        assert_eq!(missing.len(), 0, "index missed {} patches", missing.len(),);
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 717aa7a..ca8e075 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,6 +5,8 @@ pub mod cyclotomic;
 
 pub mod intgeom;
 
+pub mod stringmatch;
+
 pub mod vis;
 
 pub mod prelude;
diff --git a/src/stringmatch/cyclic.rs b/src/stringmatch/cyclic.rs
new file mode 100644
index 0000000..b3118df
--- /dev/null
+++ b/src/stringmatch/cyclic.rs
@@ -0,0 +1,520 @@
+use crate::stringmatch::MatchIndex;
+
+/// A maximal reverse-complementary match between two cyclic angle sequences.
+///
+/// Represents a shared boundary segment between two polygonal tiles:
+/// tile A's subsequence starting at `pos_a` matches tile B's subsequence
+/// traced in reverse (CW direction) starting at `pos_b`, for `len` edges.
+///
+/// The match is maximal: it cannot be extended in either direction cyclically.
+pub struct CyclicMatch {
+    pub tile_a: usize,
+    pub pos_a: usize,
+    pub tile_b: usize,
+    pub pos_b: usize,
+    pub len: usize,
+}
+
+impl std::fmt::Debug for CyclicMatch {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "CyclicMatch(tile_a={}, pos_a={}, tile_b={}, pos_b={}, len={})",
+            self.tile_a, self.pos_a, self.tile_b, self.pos_b, self.len
+        )
+    }
+}
+
+/// Index for finding all maximal reverse-complementary matches across
+/// a collection of cyclic angle sequences (polygonal tile boundaries).
+///
+/// Each input sequence represents the exterior angles of a closed polygon.
+/// The index finds all maximal-length boundary segments shared between any
+/// pair of tiles, where one tile's segment is the reverse complement of the other's
+/// (representing the same geometric boundary traced in opposite directions).
+///
+/// # Construction
+///
+/// Internally, each sequence is doubled (for cyclic wraparound) and its reverse
+/// complement is also indexed. A single `MatchIndex` is built over all 2n sequences.
+///
+/// # Usage
+///
+/// ```
+/// use tilezz::stringmatch::CyclicMatchIndex;
+///
+/// let tiles: Vec<Vec<i8>> = vec![
+///     vec![3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2],
+///     vec![1, 2, 3, 4],
+/// ];
+/// let idx = CyclicMatchIndex::new(&tiles);
+/// let matches = idx.maximal_rc_matches(0, 1);
+/// for m in &matches {
+///     println!("tile {} pos {} <-> tile {} pos {}, len {}",
+///              m.tile_a, m.pos_a, m.tile_b, m.pos_b, m.len);
+/// }
+/// ```
+pub struct CyclicMatchIndex {
+    originals: Vec<Vec<i8>>,
+    n: usize,
+    index: MatchIndex,
+}
+
+impl CyclicMatchIndex {
+    /// Build a cyclic match index over the given angle sequences.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `strings` is empty.
+    pub fn new(strings: &[Vec<i8>]) -> Self {
+        assert!(!strings.is_empty(), "need at least one string");
+
+        let n = strings.len();
+
+        let doubled: Vec<Vec<i8>> = strings
+            .iter()
+            .map(|s| s.iter().copied().chain(s.iter().copied()).collect())
+            .collect();
+
+        let doubled_rc: Vec<Vec<i8>> = strings
+            .iter()
+            .map(|s| {
+                let rc: Vec<i8> = s.iter().rev().map(|&a| -a).collect();
+                rc.iter().copied().chain(rc.iter().copied()).collect()
+            })
+            .collect();
+
+        let mut all: Vec<Vec<i8>> = Vec::with_capacity(2 * n);
+        all.extend(doubled);
+        all.extend(doubled_rc);
+
+        CyclicMatchIndex {
+            originals: strings.to_vec(),
+            n,
+            index: MatchIndex::new(&all),
+        }
+    }
+
+    /// Number of tiles in the index.
+    pub fn num_tiles(&self) -> usize {
+        self.n
+    }
+
+    /// Length (number of angles) of tile `i`.
+    pub fn tile_len(&self, i: usize) -> usize {
+        self.originals[i].len()
+    }
+
+    /// Find all maximal reverse-complementary matches between tiles `i` and `j`.
+    ///
+    /// Each `CyclicMatch` describes a shared boundary segment where tile A's
+    /// angles at positions `pos_a..pos_a+len` (cyclically) match the reverse
+    /// complement of tile B's angles at positions `pos_b-len+1..=pos_b` (cyclically).
+    pub fn maximal_rc_matches(&self, i: usize, j: usize) -> Vec<CyclicMatch> {
+        let raw = self.index.all_positive_matches(i, self.n + j);
+
+        let n_a = self.originals[i].len();
+        let n_b = self.originals[j].len();
+        let max_len = n_a.min(n_b);
+
+        if max_len == 0 {
+            return vec![];
+        }
+
+        let a = &self.originals[i];
+        let b = &self.originals[j];
+
+        let mut result: Vec<CyclicMatch> = raw
+            .iter()
+            .filter_map(|m| {
+                let len = m.a.len.min(max_len);
+                if len == 0 {
+                    return None;
+                }
+                let pos_a = m.a.start % n_a;
+                let pos_b = n_b - 1 - m.b.start % n_b;
+
+                if !is_cyclic_left_maximal(a, b, pos_a, pos_b, len) {
+                    return None;
+                }
+
+                Some(CyclicMatch {
+                    tile_a: i,
+                    pos_a,
+                    tile_b: j,
+                    pos_b,
+                    len,
+                })
+            })
+            .collect();
+
+        dedup_cyclic(&mut result);
+        result
+    }
+}
+
+fn is_cyclic_left_maximal(a: &[i8], b: &[i8], pos_a: usize, pos_b: usize, len: usize) -> bool {
+    if len >= a.len() || len >= b.len() {
+        return true;
+    }
+    let prev_a = a[(pos_a + a.len() - 1) % a.len()];
+    let prev_b = -b[(pos_b + 1) % b.len()];
+    prev_a != prev_b
+}
+
+fn dedup_cyclic(matches: &mut Vec<CyclicMatch>) {
+    matches.sort_by(|a, b| {
+        a.pos_a
+            .cmp(&b.pos_a)
+            .then_with(|| a.pos_b.cmp(&b.pos_b))
+            .then_with(|| b.len.cmp(&a.len))
+    });
+    matches.dedup_by(|a, b| a.pos_a == b.pos_a && a.pos_b == b.pos_b);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn naive_cyclic_rc_matches(a: &[i8], b: &[i8]) -> Vec<(usize, usize, usize)> {
+        let n_a = a.len();
+        let n_b = b.len();
+        if n_a == 0 || n_b == 0 {
+            return vec![];
+        }
+        let max_len = n_a.min(n_b);
+        let mut result = Vec::new();
+
+        for ia in 0..n_a {
+            for ib_rc in 0..n_b {
+                let pos_b = n_b - 1 - ib_rc;
+                let mut l = 0;
+                while l < max_len {
+                    let pa = (ia + l) % n_a;
+                    let prcb = (ib_rc + l) % n_b;
+                    let rc_val = -b[n_b - 1 - prcb];
+                    if a[pa] != rc_val {
+                        break;
+                    }
+                    l += 1;
+                }
+                if l == 0 {
+                    continue;
+                }
+                if !is_cyclic_left_maximal(a, b, ia, pos_b, l) {
+                    continue;
+                }
+                result.push((ia, pos_b, l));
+            }
+        }
+
+        result.sort();
+        result.dedup();
+        result
+    }
+
+    fn check_rc(strings: &[Vec<i8>], i: usize, j: usize) {
+        let idx = CyclicMatchIndex::new(strings);
+        let matches = idx.maximal_rc_matches(i, j);
+        let expected = naive_cyclic_rc_matches(&strings[i], &strings[j]);
+
+        let mut got: Vec<(usize, usize, usize)> =
+            matches.iter().map(|m| (m.pos_a, m.pos_b, m.len)).collect();
+        got.sort();
+        got.dedup();
+
+        assert_eq!(
+            got, expected,
+            "RC mismatch: tile {i} vs tile {j}\n  got={got:?}\n  exp={expected:?}"
+        );
+    }
+
+    #[test]
+    fn identical_tiles_full_match() {
+        let s = vec![1i8, 2, 3];
+        check_rc(&[s.clone(), s.clone()], 0, 1);
+    }
+
+    #[test]
+    fn revcomp_tile_full_match() {
+        let a = vec![1i8, 2, 3];
+        let b: Vec<i8> = a.iter().rev().map(|&x| -x).collect();
+        check_rc(&[a, b], 0, 1);
+    }
+
+    #[test]
+    fn no_match() {
+        let a = vec![1i8, 2, 3];
+        let b = vec![4i8, 5, 6];
+        check_rc(&[a, b], 0, 1);
+    }
+
+    #[test]
+    fn partial_rc_match() {
+        let a = vec![1i8, 2, 3, 4];
+        let b = vec![5i8, 6, -3, -4];
+        check_rc(&[a, b], 0, 1);
+    }
+
+    #[test]
+    fn cyclic_wraparound() {
+        let a = vec![1i8, 2, 3];
+        let b = vec![-1i8, 5, -3];
+        check_rc(&[a, b], 0, 1);
+    }
+
+    #[test]
+    fn self_match() {
+        let a = vec![1i8, 2, 1];
+        check_rc(&[a], 0, 0);
+    }
+
+    #[test]
+    fn square_tiles() {
+        let sq = vec![2i8, 2, 2, -6];
+        check_rc(&[sq.clone(), sq.clone()], 0, 1);
+    }
+
+    #[test]
+    fn spectre_like_angles() {
+        let a: Vec<i8> = vec![3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2];
+        check_rc(&[a.clone(), a.clone()], 0, 1);
+    }
+
+    #[test]
+    fn empty_tile() {
+        check_rc(&[vec![], vec![1i8, 2]], 0, 1);
+        check_rc(&[vec![1i8, 2], vec![]], 0, 1);
+    }
+
+    #[test]
+    fn single_angle_tiles() {
+        check_rc(&[vec![2i8], vec![-2i8]], 0, 1);
+        check_rc(&[vec![2i8], vec![2i8]], 0, 1);
+    }
+
+    #[test]
+    fn three_tiles_pairwise() {
+        let tiles: Vec<Vec<i8>> = vec![vec![1i8, 2, 3], vec![2i8, 3, 4], vec![-3i8, -2, 5]];
+        for i in 0..tiles.len() {
+            for j in 0..tiles.len() {
+                check_rc(&tiles, i, j);
+            }
+        }
+    }
+
+    #[test]
+    fn repeated_angles() {
+        let a = vec![1i8, 1, 1];
+        check_rc(&[a.clone(), a.clone()], 0, 1);
+    }
+
+    #[test]
+    fn negative_angles() {
+        let a = vec![-1i8, -2, -3];
+        let b = vec![3i8, 2, 1];
+        check_rc(&[a, b], 0, 1);
+    }
+
+    #[test]
+    fn vs_naive_random() {
+        let mut seed: u64 = 54321;
+        for _ in 0..40 {
+            let tiles: Vec<Vec<i8>> = (0..4)
+                .map(|_| {
+                    (0..6)
+                        .map(|_| {
+                            seed = seed
+                                .wrapping_mul(6364136223846793005)
+                                .wrapping_add(1442695040888963407);
+                            ((seed % 7) as i8) - 3
+                        })
+                        .collect()
+                })
+                .collect();
+            for i in 0..tiles.len() {
+                for j in 0..tiles.len() {
+                    check_rc(&tiles, i, j);
+                }
+            }
+        }
+    }
+
+    fn verify_rc_content(a: &[i8], b: &[i8], pos_a: usize, pos_b: usize, len: usize) {
+        let n_a = a.len();
+        let n_b = b.len();
+        assert!(len > 0, "match length must be positive");
+        assert!(len <= n_a && len <= n_b, "match length exceeds tile size");
+        for i in 0..len {
+            let pa = (pos_a + i) % n_a;
+            let pb = (n_b + pos_b - i) % n_b;
+            assert_eq!(
+                a[pa], -b[pb],
+                "RC content mismatch at offset {i}: a[{pa}]={} vs -b[{pb}]={}",
+                a[pa], -b[pb],
+            );
+        }
+    }
+
+    fn verify_all_matches(idx: &CyclicMatchIndex, i: usize, j: usize) {
+        let matches = idx.maximal_rc_matches(i, j);
+        let a = &idx.originals[i];
+        let b = &idx.originals[j];
+        for m in &matches {
+            assert_eq!(m.tile_a, i);
+            assert_eq!(m.tile_b, j);
+            assert!(m.pos_a < a.len());
+            assert!(m.pos_b < b.len());
+            assert!(m.len > 0);
+            verify_rc_content(a, b, m.pos_a, m.pos_b, m.len);
+        }
+    }
+
+    #[test]
+    fn e2e_spectre_self_match() {
+        let angles: Vec<i8> = vec![3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2];
+        let tiles = vec![angles.clone(), angles.clone()];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        check_rc(&tiles, 0, 1);
+        verify_all_matches(&idx, 0, 1);
+
+        let matches = idx.maximal_rc_matches(0, 1);
+        assert!(!matches.is_empty(), "spectre should have self-matches");
+
+        let max_len = matches.iter().map(|m| m.len).max().unwrap();
+        assert_eq!(max_len, 3, "spectre max RC self-match should be 3");
+    }
+
+    #[test]
+    fn e2e_spectre_pairwise() {
+        let a: Vec<i8> = vec![3, 2, 0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2];
+        let b: Vec<i8> = vec![0, 2, -3, 2, 3, 2, -3, 2, 3, -2, 3, -2, 3, 2];
+        let tiles = vec![a, b];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        for i in 0..2 {
+            for j in 0..2 {
+                check_rc(&tiles, i, j);
+                verify_all_matches(&idx, i, j);
+            }
+        }
+    }
+
+    #[test]
+    fn e2e_tetrominos() {
+        let t_o: Vec<i8> = vec![0, 1, 0, 1, 0, 1, 0, 1];
+        let t_i: Vec<i8> = vec![0, 0, 0, 1, 1, 0, 0, 0, 1, 1];
+        let t_t: Vec<i8> = vec![-1, 1, 1, -1, 1, 1, 0, 0, 1, 1];
+        let tiles = vec![t_o, t_i, t_t];
+
+        let idx = CyclicMatchIndex::new(&tiles);
+        for i in 0..tiles.len() {
+            for j in 0..tiles.len() {
+                check_rc(&tiles, i, j);
+                verify_all_matches(&idx, i, j);
+            }
+        }
+    }
+
+    #[test]
+    fn e2e_tetromino_self_matches() {
+        let t_s: Vec<i8> = vec![-1, 1, 1, 0, 1, -1, 1, 1, 0, 1];
+        let tiles = vec![t_s];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        let matches = idx.maximal_rc_matches(0, 0);
+        verify_all_matches(&idx, 0, 0);
+        assert!(!matches.is_empty(), "S-tetromino should have self-matches");
+    }
+
+    #[test]
+    fn e2e_hexagons_no_rc_match() {
+        let hex: Vec<i8> = vec![1, 1, 1, 1, 1, 1];
+        let tiles = vec![hex.clone(), hex.clone()];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        let matches = idx.maximal_rc_matches(0, 1);
+        assert!(
+            matches.is_empty(),
+            "identical hexagons have no RC matches (all positive vs all negative)"
+        );
+    }
+
+    #[test]
+    fn e2e_hexagon_vs_reversed_hexagon() {
+        let hex: Vec<i8> = vec![1, 1, 1, 1, 1, 1];
+        let hex_rev: Vec<i8> = vec![-1, -1, -1, -1, -1, -1];
+        let tiles = vec![hex, hex_rev];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        let matches = idx.maximal_rc_matches(0, 1);
+        verify_all_matches(&idx, 0, 1);
+
+        let max_len = matches.iter().map(|m| m.len).max().unwrap_or(0);
+        assert_eq!(
+            max_len, 6,
+            "hexagon and reversed hexagon should fully match"
+        );
+    }
+
+    #[test]
+    fn e2e_penrose_rhombs() {
+        let wide: Vec<i8> = vec![
+            2, 0, -1, 2, -1, 0, 0, 3, 0, 0, 1, -2, 1, 0, 0, 2, 0, 0, -1, 2, -1, 0, 0, 3, 0, 1, -2,
+            1, 0,
+        ];
+        let narrow: Vec<i8> = vec![
+            1, 0, -1, 2, -1, 0, 0, 4, 0, 0, -1, 2, -1, 0, 0, 1, 0, 1, -2, 1, 0, 0, 4, 0, 0, 1, -2,
+            1, 0,
+        ];
+        let tiles = vec![wide, narrow];
+        let idx = CyclicMatchIndex::new(&tiles);
+
+        for i in 0..2 {
+            for j in 0..2 {
+                check_rc(&tiles, i, j);
+                verify_all_matches(&idx, i, j);
+            }
+        }
+    }
+
+    #[test]
+    fn e2e_mixed_shapes() {
+        let square: Vec<i8> = vec![1, 1, 1, 1];
+        let triangle: Vec<i8> = vec![2, 2, 2];
+        let rect: Vec<i8> = vec![0, 0, 1, 1, 0, 0, 1, 1];
+        let tiles = vec![square, triangle, rect];
+
+        let idx = CyclicMatchIndex::new(&tiles);
+        for i in 0..tiles.len() {
+            for j in 0..tiles.len() {
+                check_rc(&tiles, i, j);
+                verify_all_matches(&idx, i, j);
+            }
+        }
+    }
+
+    #[test]
+    fn e2e_many_tiles_exhaustive() {
+        let mut seed: u64 = 99999;
+        let mut next_angle = || {
+            seed = seed
+                .wrapping_mul(6364136223846793005)
+                .wrapping_add(1442695040888963407);
+            ((seed % 7) as i8) - 3
+        };
+
+        let tiles: Vec<Vec<i8>> = (0..8)
+            .map(|_| (0..10).map(|_| next_angle()).collect())
+            .collect();
+
+        let idx = CyclicMatchIndex::new(&tiles);
+        for i in 0..tiles.len() {
+            for j in 0..tiles.len() {
+                check_rc(&tiles, i, j);
+                verify_all_matches(&idx, i, j);
+            }
+        }
+    }
+}
diff --git a/src/stringmatch/lcp.rs b/src/stringmatch/lcp.rs
new file mode 100644
index 0000000..559429d
--- /dev/null
+++ b/src/stringmatch/lcp.rs
@@ -0,0 +1,160 @@
+pub fn build_lcp_array(text: &[u32], sa: &[usize]) -> Vec<usize> {
+    let n = text.len();
+    assert_eq!(sa.len(), n, "SA length must equal text length");
+
+    if n == 0 {
+        return vec![];
+    }
+
+    let rank = build_rank(sa);
+    let mut lcp = vec![0usize; n];
+    let mut k: usize = 0;
+
+    for i in 0..n {
+        if rank[i] == 0 {
+            k = 0;
+            continue;
+        }
+        let j = sa[rank[i] - 1];
+        while i + k < n && j + k < n && text[i + k] == text[j + k] {
+            k += 1;
+        }
+        lcp[rank[i]] = k;
+        k = k.saturating_sub(1);
+    }
+
+    lcp
+}
+
+fn build_rank(sa: &[usize]) -> Vec<usize> {
+    let n = sa.len();
+    let mut rank = vec![0usize; n];
+    for (i, &pos) in sa.iter().enumerate() {
+        rank[pos] = i;
+    }
+    rank
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::stringmatch::sais::build_suffix_array;
+
+    fn naive_lcp(text: &[u32], sa: &[usize]) -> Vec<usize> {
+        let n = text.len();
+        let mut lcp = vec![0usize; n];
+        for i in 1..n {
+            let a = sa[i - 1];
+            let b = sa[i];
+            let mut k = 0;
+            while a + k < n && b + k < n && text[a + k] == text[b + k] {
+                k += 1;
+            }
+            lcp[i] = k;
+        }
+        lcp
+    }
+
+    fn build_and_check(text: &[u32]) -> Vec<usize> {
+        let sa = build_suffix_array(text);
+        let lcp = build_lcp_array(text, &sa);
+        let expected = naive_lcp(text, &sa);
+        assert_eq!(lcp, expected, "text={text:?}");
+        lcp
+    }
+
+    #[test]
+    fn single_sentinel() {
+        let text = vec![0u32];
+        let sa = build_suffix_array(&text);
+        let lcp = build_lcp_array(&text, &sa);
+        assert_eq!(lcp, vec![0]);
+    }
+
+    #[test]
+    fn two_chars() {
+        build_and_check(&[1, 0]);
+    }
+
+    #[test]
+    fn banana() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        let sa = build_suffix_array(&text);
+        let lcp = build_lcp_array(&text, &sa);
+        assert_eq!(sa, vec![6, 5, 3, 1, 0, 4, 2]);
+        assert_eq!(lcp, vec![0, 0, 1, 3, 0, 0, 2]);
+    }
+
+    #[test]
+    fn all_same() {
+        build_and_check(&[1, 1, 1, 0]);
+    }
+
+    #[test]
+    fn already_sorted() {
+        build_and_check(&[1, 2, 3, 0]);
+    }
+
+    #[test]
+    fn reverse_sorted() {
+        build_and_check(&[3, 2, 1, 0]);
+    }
+
+    #[test]
+    fn ababab() {
+        build_and_check(&[1, 2, 1, 2, 1, 2, 0]);
+    }
+
+    #[test]
+    fn two_equal_blocks() {
+        build_and_check(&[1, 2, 3, 1, 2, 3, 0]);
+    }
+
+    #[test]
+    fn long_repeated_pattern() {
+        let mut text = vec![];
+        for _ in 0..20 {
+            text.push(1u32);
+            text.push(2u32);
+        }
+        text.push(0);
+        build_and_check(&text);
+    }
+
+    #[test]
+    fn vs_naive_random() {
+        let mut s: u64 = 77777;
+        for _ in 0..50 {
+            s = s
+                .wrapping_mul(6364136223846793005)
+                .wrapping_add(1442695040888963407);
+            let n = (s as usize % 80) + 5;
+            let text: Vec<u32> = (0..n)
+                .map(|i| {
+                    if i == n - 1 {
+                        0
+                    } else {
+                        s = s
+                            .wrapping_mul(6364136223846793005)
+                            .wrapping_add(1442695040888963407);
+                        (s % 10) as u32 + 1
+                    }
+                })
+                .collect();
+            build_and_check(&text);
+        }
+    }
+
+    #[test]
+    fn rank_is_inverse_of_sa() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        let sa = build_suffix_array(&text);
+        let rank = build_rank(&sa);
+        for (i, &pos) in sa.iter().enumerate() {
+            assert_eq!(rank[pos], i, "sa[{i}]={pos} but rank[{pos}]={}", rank[pos]);
+        }
+        for i in 0..text.len() {
+            assert_eq!(sa[rank[i]], i, "sa[rank[{i}]] != {i}");
+        }
+    }
+}
diff --git a/src/stringmatch/maximal.rs b/src/stringmatch/maximal.rs
new file mode 100644
index 0000000..c819828
--- /dev/null
+++ b/src/stringmatch/maximal.rs
@@ -0,0 +1,253 @@
+use crate::stringmatch::{Match, MatchIndex};
+
+pub fn find_maximal_matches(index: &MatchIndex, i: usize, j: usize) -> Vec<Match> {
+    find_matches_with_filter(index, i, j, |index, pos_a, local_a, pos_b, local_b| {
+        is_left_maximal(index, pos_a, local_a, pos_b, local_b)
+    })
+}
+
+pub fn find_all_positive_matches(index: &MatchIndex, i: usize, j: usize) -> Vec<Match> {
+    find_matches_with_filter(index, i, j, |_, _, _, _, _| true)
+}
+
+fn find_matches_with_filter(
+    index: &MatchIndex,
+    i: usize,
+    j: usize,
+    accept: impl Fn(&MatchIndex, usize, usize, usize, usize) -> bool,
+) -> Vec<Match> {
+    let mut matches = Vec::new();
+
+    let start_i = index.boundaries[i];
+    let len_i = index.string_lens[i];
+
+    for local_a in 0..len_i {
+        let pos_a = start_i + local_a;
+        let r = index.rank[pos_a];
+
+        scan_side(
+            index,
+            i,
+            j,
+            pos_a,
+            local_a,
+            (0..r).rev(),
+            |k| index.lcp[k + 1],
+            &accept,
+            &mut matches,
+        );
+
+        scan_side(
+            index,
+            i,
+            j,
+            pos_a,
+            local_a,
+            (r + 1)..index.concat.len(),
+            |k| index.lcp[k],
+            &accept,
+            &mut matches,
+        );
+    }
+
+    matches.sort();
+    matches.dedup();
+    matches
+}
+
+#[allow(clippy::too_many_arguments)]
+fn scan_side(
+    index: &MatchIndex,
+    string_i: usize,
+    string_j: usize,
+    pos_a: usize,
+    local_a: usize,
+    range: impl Iterator<Item = usize>,
+    lcp_at: impl Fn(usize) -> usize,
+    accept: &dyn Fn(&MatchIndex, usize, usize, usize, usize) -> bool,
+    matches: &mut Vec<Match>,
+) {
+    let mut running_min = usize::MAX;
+
+    for k in range {
+        running_min = running_min.min(lcp_at(k));
+        if running_min == 0 {
+            break;
+        }
+        if index.doc[index.sa[k]] != string_j {
+            continue;
+        }
+        let pos_b = index.sa[k];
+        let local_b = index.doc_pos[pos_b];
+        if string_i == string_j && local_a == local_b {
+            continue;
+        }
+        if accept(index, pos_a, local_a, pos_b, local_b) {
+            matches.push(Match::new(
+                string_i,
+                string_j,
+                local_a,
+                local_b,
+                running_min,
+            ));
+        }
+    }
+}
+
+fn is_left_maximal(
+    index: &MatchIndex,
+    pos_a: usize,
+    local_a: usize,
+    pos_b: usize,
+    local_b: usize,
+) -> bool {
+    if local_a == 0 || local_b == 0 {
+        return true;
+    }
+    index.concat[pos_a - 1] != index.concat[pos_b - 1]
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::stringmatch::MatchIndex;
+
+    fn naive_maximal_matches(a: &[i8], b: &[i8]) -> Vec<(usize, usize, usize)> {
+        let mut result = Vec::new();
+        for ia in 0..a.len() {
+            for ib in 0..b.len() {
+                let mut l = 0;
+                while ia + l < a.len() && ib + l < b.len() && a[ia + l] == b[ib + l] {
+                    l += 1;
+                }
+                if l == 0 {
+                    continue;
+                }
+                let left_ok = ia == 0 || ib == 0 || a[ia - 1] != b[ib - 1];
+                if left_ok {
+                    result.push((ia, ib, l));
+                }
+            }
+        }
+        result.sort();
+        result.dedup();
+        result
+    }
+
+    fn normalize_matches(tuples: &mut Vec<(usize, usize, usize)>, same_string: bool) {
+        if same_string {
+            tuples.retain(|&(a, b, _)| a != b);
+            for t in tuples.iter_mut() {
+                let (a, b, l) = *t;
+                *t = (a.min(b), a.max(b), l);
+            }
+        }
+        tuples.sort();
+        tuples.dedup();
+    }
+
+    fn check_matches(strings: &[Vec<i8>], i: usize, j: usize) {
+        let idx = MatchIndex::new(strings);
+        let matches = idx.maximal_matches(i, j);
+
+        let mut got: Vec<(usize, usize, usize)> = matches
+            .iter()
+            .map(|m| (m.a.start, m.b.start, m.a.len))
+            .collect();
+        let mut expected = naive_maximal_matches(&strings[i], &strings[j]);
+        let same = i == j;
+        normalize_matches(&mut got, same);
+        normalize_matches(&mut expected, same);
+
+        assert_eq!(got, expected, "mismatch for strings[{i}] vs strings[{j}]");
+    }
+
+    #[test]
+    fn no_matches() {
+        check_matches(&[vec![1i8, 2], vec![3i8, 4]], 0, 1);
+    }
+
+    #[test]
+    fn full_match() {
+        check_matches(&[vec![1i8, 2, 3], vec![1i8, 2, 3]], 0, 1);
+    }
+
+    #[test]
+    fn partial_overlap() {
+        check_matches(&[vec![1i8, 2, 3, 4], vec![3i8, 4, 5]], 0, 1);
+    }
+
+    #[test]
+    fn partial_overlap_reversed() {
+        check_matches(&[vec![3i8, 4, 5], vec![1i8, 2, 3, 4]], 0, 1);
+    }
+
+    #[test]
+    fn multiple_overlaps() {
+        check_matches(&[vec![1i8, 2, 1, 2], vec![1i8, 2, 3, 1]], 0, 1);
+    }
+
+    #[test]
+    fn same_string_self_matches() {
+        check_matches(&[vec![1i8, 2, 1, 2]], 0, 0);
+    }
+
+    #[test]
+    fn single_char_strings() {
+        check_matches(&[vec![5i8], vec![5i8]], 0, 1);
+        check_matches(&[vec![5i8], vec![6i8]], 0, 1);
+    }
+
+    #[test]
+    fn empty_string_involved() {
+        check_matches(&[vec![], vec![1i8, 2]], 0, 1);
+        check_matches(&[vec![1i8, 2], vec![]], 0, 1);
+    }
+
+    #[test]
+    fn all_same_string() {
+        check_matches(&[vec![1i8, 1, 1]], 0, 0);
+    }
+
+    #[test]
+    fn three_strings_pairwise() {
+        let strings: Vec<Vec<i8>> = vec![vec![1i8, 2, 3], vec![2i8, 3, 4], vec![5i8, 1, 2]];
+        check_matches(&strings, 0, 1);
+        check_matches(&strings, 0, 2);
+        check_matches(&strings, 1, 2);
+    }
+
+    #[test]
+    fn long_repeated_pattern() {
+        let s: Vec<i8> = [1i8, 2].iter().cycle().take(20).copied().collect();
+        check_matches(&[s.clone(), s.clone()], 0, 1);
+    }
+
+    #[test]
+    fn vs_naive_random() {
+        let mut seed: u64 = 98765;
+        for _ in 0..30 {
+            let strings = (0..3)
+                .map(|_| {
+                    (0..8)
+                        .map(|_| {
+                            seed = seed
+                                .wrapping_mul(6364136223846793005)
+                                .wrapping_add(1442695040888963407);
+                            ((seed % 5) as i8) - 2
+                        })
+                        .collect()
+                })
+                .collect::<Vec<Vec<i8>>>();
+            for i in 0..strings.len() {
+                for j in i..strings.len() {
+                    check_matches(&strings, i, j);
+                }
+            }
+        }
+    }
+
+    #[test]
+    fn negative_values() {
+        check_matches(&[vec![-1i8, -2, -3], vec![-2i8, -3, -4]], 0, 1);
+    }
+}
diff --git a/src/stringmatch/mod.rs b/src/stringmatch/mod.rs
new file mode 100644
index 0000000..f8df680
--- /dev/null
+++ b/src/stringmatch/mod.rs
@@ -0,0 +1,299 @@
+mod cyclic;
+mod lcp;
+mod maximal;
+mod rmq;
+mod sais;
+
+pub use cyclic::{CyclicMatch, CyclicMatchIndex};
+
+use std::collections::HashMap;
+
+pub struct Interval {
+    pub string: usize,
+    pub start: usize,
+    pub len: usize,
+}
+
+pub struct Match {
+    pub a: Interval,
+    pub b: Interval,
+}
+
+impl Match {
+    fn new(string_i: usize, string_j: usize, start_i: usize, start_j: usize, len: usize) -> Self {
+        if string_i == string_j && start_i > start_j {
+            Match {
+                a: Interval {
+                    string: string_j,
+                    start: start_j,
+                    len,
+                },
+                b: Interval {
+                    string: string_i,
+                    start: start_i,
+                    len,
+                },
+            }
+        } else {
+            Match {
+                a: Interval {
+                    string: string_i,
+                    start: start_i,
+                    len,
+                },
+                b: Interval {
+                    string: string_j,
+                    start: start_j,
+                    len,
+                },
+            }
+        }
+    }
+}
+
+impl PartialEq for Match {
+    fn eq(&self, other: &Self) -> bool {
+        self.a.string == other.a.string
+            && self.a.start == other.a.start
+            && self.a.len == other.a.len
+            && self.b.string == other.b.string
+            && self.b.start == other.b.start
+            && self.b.len == other.b.len
+    }
+}
+
+impl Eq for Match {}
+
+impl PartialOrd for Match {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for Match {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.a
+            .string
+            .cmp(&other.a.string)
+            .then_with(|| self.a.start.cmp(&other.a.start))
+            .then_with(|| self.b.string.cmp(&other.b.string))
+            .then_with(|| self.b.start.cmp(&other.b.start))
+            .then_with(|| self.a.len.cmp(&other.a.len))
+    }
+}
+
+pub struct MatchIndex {
+    concat: Vec<u32>,
+    sa: Vec<usize>,
+    lcp: Vec<usize>,
+    rank: Vec<usize>,
+    doc: Vec<usize>,
+    doc_pos: Vec<usize>,
+    boundaries: Vec<usize>,
+    string_lens: Vec<usize>,
+    rmq: rmq::SparseTable,
+    num_strings: usize,
+}
+
+impl MatchIndex {
+    pub fn new(strings: &[Vec<i8>]) -> Self {
+        assert!(!strings.is_empty(), "need at least one string");
+
+        let num_strings = strings.len();
+        let (concat, boundaries, string_lens) = Self::concatenate(strings);
+
+        let sa = sais::build_suffix_array(&concat);
+        let lcp = lcp::build_lcp_array(&concat, &sa);
+        let rank = Self::build_rank(&sa);
+        let (doc, doc_pos) = Self::build_doc_arrays(concat.len(), &boundaries, &string_lens);
+        let rmq = rmq::SparseTable::new(&lcp);
+
+        MatchIndex {
+            concat,
+            sa,
+            lcp,
+            rank,
+            doc,
+            doc_pos,
+            boundaries,
+            string_lens,
+            rmq,
+            num_strings,
+        }
+    }
+
+    pub fn num_strings(&self) -> usize {
+        self.num_strings
+    }
+
+    pub fn string_len(&self, i: usize) -> usize {
+        self.string_lens[i]
+    }
+
+    pub fn maximal_matches(&self, i: usize, j: usize) -> Vec<Match> {
+        maximal::find_maximal_matches(self, i, j)
+    }
+
+    pub(crate) fn all_positive_matches(&self, i: usize, j: usize) -> Vec<Match> {
+        maximal::find_all_positive_matches(self, i, j)
+    }
+
+    pub fn lcp_between(&self, a: usize, b: usize) -> usize {
+        if a == b {
+            return self.concat.len() - a;
+        }
+        let ra = self.rank[a];
+        let rb = self.rank[b];
+        let (lo, hi) = if ra < rb { (ra + 1, rb) } else { (rb + 1, ra) };
+        self.rmq.query(lo, hi)
+    }
+
+    fn concatenate(strings: &[Vec<i8>]) -> (Vec<u32>, Vec<usize>, Vec<usize>) {
+        let n = strings.len();
+
+        let mut chars: Vec<i8> = strings.iter().flat_map(|s| s.iter().copied()).collect();
+        chars.sort();
+        chars.dedup();
+
+        let char_offset = n as u32;
+        let char_map: HashMap<i8, u32> = chars
+            .into_iter()
+            .enumerate()
+            .map(|(i, c)| (c, char_offset + i as u32))
+            .collect();
+
+        let total_len: usize = strings.iter().map(|s| s.len()).sum::<usize>() + n;
+        let mut concat = Vec::with_capacity(total_len);
+        let mut boundaries = Vec::with_capacity(n);
+        let mut string_lens = Vec::with_capacity(n);
+
+        for (i, s) in strings.iter().enumerate() {
+            boundaries.push(concat.len());
+            string_lens.push(s.len());
+            for &c in s {
+                concat.push(char_map[&c]);
+            }
+            concat.push((n - 1 - i) as u32);
+        }
+
+        (concat, boundaries, string_lens)
+    }
+
+    fn build_rank(sa: &[usize]) -> Vec<usize> {
+        let mut rank = vec![0usize; sa.len()];
+        for (i, &pos) in sa.iter().enumerate() {
+            rank[pos] = i;
+        }
+        rank
+    }
+
+    fn build_doc_arrays(
+        n: usize,
+        boundaries: &[usize],
+        string_lens: &[usize],
+    ) -> (Vec<usize>, Vec<usize>) {
+        let mut doc = vec![0usize; n];
+        let mut doc_pos = vec![0usize; n];
+        for (sid, (&start, &len)) in boundaries.iter().zip(string_lens.iter()).enumerate() {
+            for i in 0..=len {
+                doc[start + i] = sid;
+                doc_pos[start + i] = i;
+            }
+        }
+        (doc, doc_pos)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn single_string() {
+        let idx = MatchIndex::new(&[vec![1i8, 2, 3]]);
+        assert_eq!(idx.num_strings(), 1);
+        assert_eq!(idx.string_len(0), 3);
+    }
+
+    #[test]
+    fn two_strings() {
+        let idx = MatchIndex::new(&[vec![1i8, 2], vec![3i8, 4]]);
+        assert_eq!(idx.num_strings(), 2);
+        assert_eq!(idx.string_len(0), 2);
+        assert_eq!(idx.string_len(1), 2);
+    }
+
+    #[test]
+    fn empty_strings() {
+        let idx = MatchIndex::new(&[vec![], vec![1i8], vec![]]);
+        assert_eq!(idx.num_strings(), 3);
+        assert_eq!(idx.string_len(0), 0);
+        assert_eq!(idx.string_len(1), 1);
+        assert_eq!(idx.string_len(2), 0);
+    }
+
+    #[test]
+    fn concat_last_is_sentinel_zero() {
+        let idx = MatchIndex::new(&[vec![1i8, 2], vec![3i8]]);
+        assert_eq!(*idx.concat.last().unwrap(), 0);
+    }
+
+    #[test]
+    fn concat_sentinels_unique_and_below_chars() {
+        let strings: Vec<Vec<i8>> = vec![vec![5i8, 6], vec![7i8], vec![8i8, 9, 10]];
+        let idx = MatchIndex::new(&strings);
+        let n = strings.len();
+        let sentinel_positions: Vec<usize> = strings
+            .iter()
+            .enumerate()
+            .map(|(i, s)| idx.boundaries[i] + s.len())
+            .collect();
+        let sentinels: Vec<u32> = sentinel_positions.iter().map(|&p| idx.concat[p]).collect();
+        let char_offset = n as u32;
+        for &s in &sentinels {
+            assert!(s < char_offset, "sentinel {s} >= char_offset {char_offset}");
+        }
+        let char_values: Vec<u32> = idx
+            .concat
+            .iter()
+            .enumerate()
+            .filter(|(i, _)| !sentinel_positions.contains(i))
+            .map(|(_, &v)| v)
+            .collect();
+        for &c in &char_values {
+            assert!(c >= char_offset, "char {c} < char_offset {char_offset}");
+        }
+    }
+
+    #[test]
+    fn rank_is_inverse_of_sa() {
+        let idx = MatchIndex::new(&[vec![1i8, 2, 1], vec![2i8, 1, 2]]);
+        for (i, &pos) in idx.sa.iter().enumerate() {
+            assert_eq!(idx.rank[pos], i);
+        }
+        for i in 0..idx.concat.len() {
+            assert_eq!(idx.sa[idx.rank[i]], i);
+        }
+    }
+
+    #[test]
+    fn doc_arrays_correct() {
+        let strings: Vec<Vec<i8>> = vec![vec![1i8, 2], vec![3i8]];
+        let idx = MatchIndex::new(&strings);
+        assert_eq!(idx.doc[idx.boundaries[0]], 0);
+        assert_eq!(idx.doc[idx.boundaries[0] + 1], 0);
+        assert_eq!(idx.doc[idx.boundaries[1]], 1);
+        assert_eq!(idx.doc_pos[idx.boundaries[0]], 0);
+        assert_eq!(idx.doc_pos[idx.boundaries[0] + 1], 1);
+        assert_eq!(idx.doc_pos[idx.boundaries[1]], 0);
+    }
+
+    #[test]
+    fn lcp_between_correct() {
+        let strings: Vec<Vec<i8>> = vec![vec![1i8, 2, 3], vec![2i8, 3, 4]];
+        let idx = MatchIndex::new(&strings);
+        let pos_23_in_0 = idx.boundaries[0] + 1;
+        let pos_23_in_1 = idx.boundaries[1];
+        assert_eq!(idx.lcp_between(pos_23_in_0, pos_23_in_1), 2);
+    }
+}
diff --git a/src/stringmatch/rmq.rs b/src/stringmatch/rmq.rs
new file mode 100644
index 0000000..e87eb71
--- /dev/null
+++ b/src/stringmatch/rmq.rs
@@ -0,0 +1,154 @@
+pub struct SparseTable {
+    table: Vec<Vec<usize>>,
+    log: Vec<usize>,
+}
+
+impl SparseTable {
+    pub fn new(data: &[usize]) -> Self {
+        let n = data.len();
+        if n == 0 {
+            return SparseTable {
+                table: vec![],
+                log: vec![],
+            };
+        }
+
+        let max_k = floor_log2(n);
+        let mut table = Vec::with_capacity(max_k + 1);
+
+        table.push(data.to_vec());
+
+        for k in 1..=max_k {
+            let half = 1 << (k - 1);
+            let prev = &table[k - 1];
+            let len = n.saturating_sub(half);
+            let mut row = vec![0usize; n];
+            for i in 0..len {
+                row[i] = prev[i].min(prev[i + half]);
+            }
+            row[len..n].copy_from_slice(&prev[len..n]);
+            table.push(row);
+        }
+
+        let mut log = vec![0usize; n + 1];
+        for i in 2..=n {
+            log[i] = log[i / 2] + 1;
+        }
+
+        SparseTable { table, log }
+    }
+
+    pub fn query(&self, l: usize, r: usize) -> usize {
+        assert!(l <= r, "query requires l <= r");
+        let k = self.log[r - l + 1];
+        self.table[k][l].min(self.table[k][r + 1 - (1 << k)])
+    }
+}
+
+fn floor_log2(n: usize) -> usize {
+    if n == 0 {
+        return 0;
+    }
+    (usize::BITS - 1 - n.leading_zeros()) as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn naive_rmq(data: &[usize], l: usize, r: usize) -> usize {
+        data[l..=r].iter().copied().min().unwrap()
+    }
+
+    #[test]
+    fn empty() {
+        let st = SparseTable::new(&[]);
+        assert!(st.table.is_empty());
+    }
+
+    #[test]
+    fn single_element() {
+        let st = SparseTable::new(&[42]);
+        assert_eq!(st.query(0, 0), 42);
+    }
+
+    #[test]
+    fn all_same() {
+        let data = vec![5, 5, 5, 5, 5];
+        let st = SparseTable::new(&data);
+        for l in 0..data.len() {
+            for r in l..data.len() {
+                assert_eq!(st.query(l, r), 5);
+            }
+        }
+    }
+
+    #[test]
+    fn ascending() {
+        let data: Vec<usize> = (0..10).collect();
+        let st = SparseTable::new(&data);
+        for l in 0..data.len() {
+            for r in l..data.len() {
+                assert_eq!(st.query(l, r), l, "rmq({l}, {r})");
+            }
+        }
+    }
+
+    #[test]
+    fn descending() {
+        let data: Vec<usize> = (0..10).rev().collect();
+        let st = SparseTable::new(&data);
+        for l in 0..data.len() {
+            for r in l..data.len() {
+                assert_eq!(st.query(l, r), naive_rmq(&data, l, r), "rmq({l}, {r})");
+            }
+        }
+    }
+
+    #[test]
+    fn all_pairs_vs_naive() {
+        let data = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5];
+        let st = SparseTable::new(&data);
+        for l in 0..data.len() {
+            for r in l..data.len() {
+                assert_eq!(st.query(l, r), naive_rmq(&data, l, r), "rmq({l}, {r})");
+            }
+        }
+    }
+
+    #[test]
+    fn power_of_two_length() {
+        let data: Vec<usize> = (0..16).collect();
+        let st = SparseTable::new(&data);
+        assert_eq!(st.query(0, 15), 0);
+        assert_eq!(st.query(7, 8), 7);
+        assert_eq!(st.query(15, 15), 15);
+    }
+
+    #[test]
+    fn random_vs_naive() {
+        let mut s: u64 = 31415;
+        let data: Vec<usize> = (0..100)
+            .map(|_| {
+                s = s
+                    .wrapping_mul(6364136223846793005)
+                    .wrapping_add(1442695040888963407);
+                (s % 1000) as usize
+            })
+            .collect();
+        let st = SparseTable::new(&data);
+
+        let mut s2: u64 = 27182;
+        for _ in 0..500 {
+            s2 = s2
+                .wrapping_mul(6364136223846793005)
+                .wrapping_add(1442695040888963407);
+            let l = (s2 as usize) % data.len();
+            s2 = s2
+                .wrapping_mul(6364136223846793005)
+                .wrapping_add(1442695040888963407);
+            let r = l + ((s2 as usize) % (data.len() - l));
+            assert_eq!(st.query(l, r), naive_rmq(&data, l, r), "rmq({l}, {r})");
+        }
+    }
+}
diff --git a/src/stringmatch/sais.rs b/src/stringmatch/sais.rs
new file mode 100644
index 0000000..ce0f45f
--- /dev/null
+++ b/src/stringmatch/sais.rs
@@ -0,0 +1,409 @@
+//! SA-IS (Suffix Array Induced Sorting) for integer alphabets.
+//!
+//! Reference: Nong, Zhang, Chan -
+//! "Two Efficient Algorithms for Linear Time Suffix Array Construction" (2011)
+//!
+//! Input: a sequence of `u32` values in `[0, K)` where `K` is the alphabet size.
+//! The last element **must** be `0` (the sentinel, which is the unique minimum).
+
+const EMPTY: usize = usize::MAX;
+
+pub fn build_suffix_array(text: &[u32]) -> Vec<usize> {
+    assert!(!text.is_empty(), "text must not be empty");
+    assert_eq!(
+        *text.last().unwrap(),
+        0,
+        "last element must be sentinel (0)"
+    );
+
+    let n = text.len();
+    if n == 1 {
+        return vec![0];
+    }
+
+    let alphabet_size = (*text.iter().max().unwrap()) as usize + 1;
+    let mut sa = vec![EMPTY; n];
+    sais(text, &mut sa, alphabet_size);
+    sa
+}
+
+fn sais(text: &[u32], sa: &mut [usize], alphabet_size: usize) {
+    let n = text.len();
+
+    if n == 1 {
+        sa[0] = 0;
+        return;
+    }
+
+    let stypes = classify_types(text);
+
+    sa.fill(EMPTY);
+    place_lms_right_to_left(text, sa, &stypes, alphabet_size);
+    induce_l(text, sa, &stypes, alphabet_size);
+    induce_s(text, sa, &stypes, alphabet_size);
+
+    let n1 = compact_sorted_lms(sa, &stypes);
+    let sorted_lms = &sa[..n1];
+
+    let (names, num_names) = name_lms_substrings(text, &stypes, sorted_lms);
+
+    let mut pos_to_name = vec![0u32; n];
+    for j in 0..n1 {
+        pos_to_name[sorted_lms[j]] = names[j];
+    }
+
+    let lms_in_order: Vec<usize> = (0..n).filter(|&i| is_lms(&stypes, i)).collect();
+    let s1: Vec<u32> = lms_in_order.iter().map(|&p| pos_to_name[p]).collect();
+
+    let sorted_lms_final = if num_names as usize == n1 {
+        sorted_lms.to_vec()
+    } else {
+        let k1 = num_names as usize;
+        let mut sa1 = vec![EMPTY; n1];
+        sais(&s1, &mut sa1, k1);
+        sa1.iter().map(|&rank| lms_in_order[rank]).collect()
+    };
+
+    sa.fill(EMPTY);
+    place_lms_sorted(text, sa, alphabet_size, &sorted_lms_final);
+    induce_l(text, sa, &stypes, alphabet_size);
+    induce_s(text, sa, &stypes, alphabet_size);
+}
+
+fn classify_types(text: &[u32]) -> Vec<bool> {
+    let n = text.len();
+    let mut stypes = vec![false; n];
+    stypes[n - 1] = true;
+    for i in (0..n - 1).rev() {
+        if text[i] < text[i + 1] {
+            stypes[i] = true;
+        } else if text[i] == text[i + 1] {
+            stypes[i] = stypes[i + 1];
+        }
+    }
+    stypes
+}
+
+fn is_lms(stypes: &[bool], i: usize) -> bool {
+    i > 0 && stypes[i] && !stypes[i - 1]
+}
+
+fn place_lms_right_to_left(text: &[u32], sa: &mut [usize], stypes: &[bool], alphabet_size: usize) {
+    let mut bk = Buckets::from_text(text, alphabet_size);
+    for i in (1..text.len()).rev() {
+        if is_lms(stypes, i) {
+            let c = text[i] as usize;
+            sa[bk.tail(c)] = i;
+            bk.decr_tail(c);
+        }
+    }
+}
+
+fn place_lms_sorted(text: &[u32], sa: &mut [usize], alphabet_size: usize, sorted: &[usize]) {
+    let mut bk = Buckets::from_text(text, alphabet_size);
+    for &pos in sorted.iter().rev() {
+        let c = text[pos] as usize;
+        sa[bk.tail(c)] = pos;
+        bk.decr_tail(c);
+    }
+}
+
+fn induce_l(text: &[u32], sa: &mut [usize], stypes: &[bool], alphabet_size: usize) {
+    let n = sa.len();
+    let mut bk = Buckets::from_text(text, alphabet_size);
+    for i in 0..n {
+        if sa[i] == EMPTY {
+            continue;
+        }
+        let j = sa[i];
+        if j == 0 {
+            continue;
+        }
+        if !stypes[j - 1] {
+            let c = text[j - 1] as usize;
+            sa[bk.head(c)] = j - 1;
+            bk.incr_head(c);
+        }
+    }
+}
+
+fn induce_s(text: &[u32], sa: &mut [usize], stypes: &[bool], alphabet_size: usize) {
+    let n = sa.len();
+    let mut bk = Buckets::from_text(text, alphabet_size);
+    for i in (0..n).rev() {
+        if sa[i] == EMPTY {
+            continue;
+        }
+        let j = sa[i];
+        if j == 0 {
+            continue;
+        }
+        if stypes[j - 1] {
+            let c = text[j - 1] as usize;
+            sa[bk.tail(c)] = j - 1;
+            bk.decr_tail(c);
+        }
+    }
+}
+
+fn compact_sorted_lms(sa: &mut [usize], stypes: &[bool]) -> usize {
+    let n = sa.len();
+    let mut n1 = 0;
+    for i in 0..n {
+        if sa[i] != EMPTY && is_lms(stypes, sa[i]) {
+            sa[n1] = sa[i];
+            n1 += 1;
+        }
+    }
+    for slot in sa.iter_mut().take(n).skip(n1) {
+        *slot = EMPTY;
+    }
+    n1
+}
+
+fn name_lms_substrings(text: &[u32], stypes: &[bool], sorted_lms: &[usize]) -> (Vec<u32>, u32) {
+    let n1 = sorted_lms.len();
+    let mut names = vec![0u32; n1];
+    let mut num_names = 1u32;
+    for j in 1..n1 {
+        if !lms_substrings_equal(text, stypes, sorted_lms[j - 1], sorted_lms[j]) {
+            num_names += 1;
+        }
+        names[j] = num_names - 1;
+    }
+    (names, num_names)
+}
+
+fn lms_substrings_equal(text: &[u32], stypes: &[bool], p1: usize, p2: usize) -> bool {
+    let n = text.len();
+    let mut d = 0;
+    loop {
+        let a = p1 + d;
+        let b = p2 + d;
+        if a >= n || b >= n {
+            return a >= n && b >= n;
+        }
+        if text[a] != text[b] || stypes[a] != stypes[b] {
+            return false;
+        }
+        if d > 0 && is_lms(stypes, a) {
+            return is_lms(stypes, b);
+        }
+        d += 1;
+    }
+}
+
+struct Buckets {
+    head: Vec<usize>,
+    tail: Vec<usize>,
+}
+
+impl Buckets {
+    fn from_text(text: &[u32], alphabet_size: usize) -> Self {
+        let mut count = vec![0usize; alphabet_size];
+        for &c in text {
+            count[c as usize] += 1;
+        }
+        let mut head = vec![0usize; alphabet_size];
+        let mut tail = vec![0usize; alphabet_size];
+        let mut sum = 0;
+        for i in 0..alphabet_size {
+            head[i] = sum;
+            sum += count[i];
+            tail[i] = sum.saturating_sub(1);
+        }
+        Buckets { head, tail }
+    }
+
+    fn head(&self, c: usize) -> usize {
+        self.head[c]
+    }
+
+    fn tail(&self, c: usize) -> usize {
+        self.tail[c]
+    }
+
+    fn incr_head(&mut self, c: usize) {
+        self.head[c] += 1;
+    }
+
+    fn decr_tail(&mut self, c: usize) {
+        self.tail[c] = self.tail[c].saturating_sub(1);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn naive_sa(text: &[u32]) -> Vec<usize> {
+        let mut sa: Vec<usize> = (0..text.len()).collect();
+        sa.sort_by(|&a, &b| text[a..].cmp(&text[b..]));
+        sa
+    }
+
+    #[test]
+    fn single_sentinel() {
+        assert_eq!(build_suffix_array(&[0]), vec![0]);
+    }
+
+    #[test]
+    fn two_chars() {
+        assert_eq!(build_suffix_array(&[1, 0]), vec![1, 0]);
+    }
+
+    #[test]
+    fn banana() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        assert_eq!(build_suffix_array(&text), vec![6, 5, 3, 1, 0, 4, 2]);
+    }
+
+    #[test]
+    fn all_same() {
+        assert_eq!(build_suffix_array(&[1, 1, 1, 0]), vec![3, 2, 1, 0]);
+    }
+
+    #[test]
+    fn already_sorted() {
+        let text: Vec<u32> = vec![1, 2, 3, 0];
+        let sa = build_suffix_array(&text);
+        assert_eq!(sa, naive_sa(&text));
+    }
+
+    #[test]
+    fn reverse_sorted() {
+        let text: Vec<u32> = vec![3, 2, 1, 0];
+        let sa = build_suffix_array(&text);
+        assert_eq!(sa, naive_sa(&text));
+    }
+
+    #[test]
+    fn ababab() {
+        let text: Vec<u32> = vec![1, 2, 1, 2, 1, 2, 0];
+        let sa = build_suffix_array(&text);
+        assert_eq!(sa, naive_sa(&text));
+    }
+
+    #[test]
+    fn descending_stairs() {
+        let text: Vec<u32> = vec![4, 3, 2, 1, 0];
+        assert_eq!(build_suffix_array(&text), vec![4, 3, 2, 1, 0]);
+    }
+
+    #[test]
+    fn ascending_stairs() {
+        let text: Vec<u32> = vec![1, 2, 3, 4, 0];
+        assert_eq!(build_suffix_array(&text), naive_sa(&text));
+    }
+
+    #[test]
+    fn two_equal_blocks() {
+        let text: Vec<u32> = vec![1, 2, 3, 1, 2, 3, 0];
+        assert_eq!(build_suffix_array(&text), naive_sa(&text));
+    }
+
+    #[test]
+    fn single_char_before_sentinel() {
+        for c in [1u32, 2, 5, 10] {
+            let text = vec![c, 0];
+            assert_eq!(build_suffix_array(&text), vec![1, 0], "c={c}");
+        }
+    }
+
+    #[test]
+    fn long_repeated_pattern() {
+        let mut text = vec![];
+        for _ in 0..20 {
+            text.push(1u32);
+            text.push(2u32);
+        }
+        text.push(0);
+        assert_eq!(build_suffix_array(&text), naive_sa(&text));
+    }
+
+    #[test]
+    fn every_char_different() {
+        let text: Vec<u32> = vec![5, 3, 1, 4, 2, 0];
+        assert_eq!(build_suffix_array(&text), naive_sa(&text));
+    }
+
+    #[test]
+    fn vs_naive_random_small() {
+        let seeds = [42u64, 137, 999, 0, u64::MAX];
+        for seed in seeds {
+            let mut s = seed;
+            for n in [2, 3, 5, 7, 13, 21, 37] {
+                let text: Vec<u32> = (0..n)
+                    .map(|i| {
+                        if i == n - 1 {
+                            0
+                        } else {
+                            s = s
+                                .wrapping_mul(6364136223846793005)
+                                .wrapping_add(1442695040888963407);
+                            (s % 5) as u32 + 1
+                        }
+                    })
+                    .collect();
+                let sais_sa = build_suffix_array(&text);
+                let naive = naive_sa(&text);
+                assert_eq!(sais_sa, naive, "seed={seed}, n={n}");
+            }
+        }
+    }
+
+    #[test]
+    fn vs_naive_random_medium() {
+        let mut s: u64 = 12345;
+        for _ in 0..50 {
+            s = s
+                .wrapping_mul(6364136223846793005)
+                .wrapping_add(1442695040888963407);
+            let n = (s as usize % 80) + 5;
+            let text: Vec<u32> = (0..n)
+                .map(|i| {
+                    if i == n - 1 {
+                        0
+                    } else {
+                        s = s
+                            .wrapping_mul(6364136223846793005)
+                            .wrapping_add(1442695040888963407);
+                        (s % 10) as u32 + 1
+                    }
+                })
+                .collect();
+            let sais_sa = build_suffix_array(&text);
+            let naive = naive_sa(&text);
+            assert_eq!(sais_sa, naive);
+        }
+    }
+
+    #[test]
+    fn classify_types_known() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        let stypes = classify_types(&text);
+        assert_eq!(stypes, vec![false, true, false, true, false, false, true]);
+    }
+
+    #[test]
+    fn lms_positions_banana() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        let stypes = classify_types(&text);
+        let lms: Vec<usize> = (0..text.len()).filter(|&i| is_lms(&stypes, i)).collect();
+        assert_eq!(lms, vec![1, 3, 6]);
+    }
+
+    #[test]
+    fn buckets_correct() {
+        let text: Vec<u32> = vec![2, 1, 3, 1, 3, 1, 0];
+        let bk = Buckets::from_text(&text, 4);
+        assert_eq!(bk.head(0), 0);
+        assert_eq!(bk.tail(0), 0);
+        assert_eq!(bk.head(1), 1);
+        assert_eq!(bk.tail(1), 3);
+        assert_eq!(bk.head(2), 4);
+        assert_eq!(bk.tail(2), 4);
+        assert_eq!(bk.head(3), 5);
+        assert_eq!(bk.tail(3), 6);
+    }
+}