API is unstable. Not production-ready.
Sriracha is a Go library for privacy-preserving record linkage. It enables institutions to share person records across organizational boundaries without transmitting raw PII. Sriracha provides the building blocks for building privacy-first transports. Records are normalized and tokenized with a shared secret, producing tokens that can be compared without exposing the underlying identifiers.
- Deterministic tokenization using HMAC-SHA256
- Probabilistic tokenization with Sørensen–Dice matching
- Optional BLIP and balanced filter defenses against frequency analysis
- Unicode normalization pipeline
- Canonical field set with support for extended schemas
Requires Go 1.24+
go get github.com/ccuetoh/sriracha@v0.1.0package main
import (
"fmt"
"github.com/ccuetoh/sriracha"
"github.com/ccuetoh/sriracha/fieldset"
"github.com/ccuetoh/sriracha/session"
)
func main() {
secret := []byte("super-secret-key")
s, _ := session.New(secret, fieldset.DefaultFieldSet())
defer s.Destroy()
// Deterministic tokenization
tokA, _ := s.TokenizeDeterministic(sriracha.RawRecord{
sriracha.FieldNameGiven: "Alice",
sriracha.FieldNameFamily: "Smith",
})
tokB, _ := s.TokenizeDeterministic(sriracha.RawRecord{
sriracha.FieldNameGiven: "Alice",
sriracha.FieldNameFamily: "Smith",
})
eq := s.Equal(tokA, tokB)
fmt.Printf("match: %v\n", eq)
// Probabilistic tokenization
bloomA, _ := s.TokenizeProbabilistic(sriracha.RawRecord{
sriracha.FieldNameGiven: "Alice",
sriracha.FieldNameFamily: "Smith",
})
bloomB, _ := s.TokenizeProbabilistic(sriracha.RawRecord{
sriracha.FieldNameGiven: "Alice",
sriracha.FieldNameFamily: "Smyth", // typo
})
result, _ := s.Match(bloomA, bloomB, 0.85)
fmt.Printf("match: %v (score: %.2f)\n", result.IsMatch, result.Score)
}Live history on Bencher.
| Corpus | Records | Pairs | AUROC | Accuracy | Recall |
|---|---|---|---|---|---|
| OpenSanctions | 26 841 | natural cross-source | 0.93 | 0.91 | 0.87 |
| FEBRL4 | 10 000 | synthetic (FEBRL4 noise) | 1.00 | 1.00 | 1.00 |
| NCVR | 8 848 | synthetic (1–2 char edits) | 1.00 | 1.00 | 1.00 |
