diff --git a/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java b/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java
index ee8724d4..955f9d71 100644
--- a/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java
+++ b/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java
@@ -29,11 +29,13 @@
*/
public class MappingHasher {
+ private final static long FNV64_PRIME = 1099511628211L;
+ private final static long FNV64_OFFSET = -3750763034362895579L;
+
// Z-Base32 output alphabet
private static char[] ZB32 = { 'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c', 'p', 'q', 'x', 'o',
't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3', '4', '5', 'h', '7', '6', '9' };
private MessageDigest md;
- private boolean useHex;
/**
* Creates a new instance.
@@ -43,19 +45,18 @@ public MappingHasher() {
}
/**
- * Creates a new instances that produces hexadecimal-encoded hash values.
+ * Creates a new instance that produces fast, FNV64-based hexadecimal-encoded
+ * hash values.
*
- * @param useHex If true, this instance will produce hash values
- * that are encoded in hexadecimal, rather than in ZBase32
- * encoding.
+ * @param useFNV If true, this instance will produce hash values
+ * using the FNV64 function, rather than the SHA2-256 function.
*/
- public MappingHasher(boolean useHex) {
- try {
- md = MessageDigest.getInstance("SHA-256");
- } catch ( NoSuchAlgorithmException e ) {
-
+ public MappingHasher(boolean useFNV) {
+ if ( !useFNV) {
+ try {
+ md = MessageDigest.getInstance("SHA-256");
+ } catch ( NoSuchAlgorithmException e ) { }
}
- this.useHex = useHex;
}
/**
@@ -65,40 +66,28 @@ public MappingHasher(boolean useHex) {
* @return The unique hash for the mapping.
*/
public String hash(Mapping mapping) {
- byte[] digest;
if ( md != null ) {
- digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8));
+ byte[] digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8));
md.reset();
+ return toZBase32(digest);
} else {
- // SHA2-256 not available? This should probably never happen, but just in case
- // we fall back to the built-in Java hash code.
- digest = new byte[4];
- int hashcode = mapping.hashCode();
- digest[0] = (byte) (hashcode & 0xFF);
- digest[1] = (byte) ((hashcode & 0xFF00) >> 8);
- digest[2] = (byte) ((hashcode & 0xFF0000) >> 16);
- digest[3] = (byte) ((hashcode & 0xFF000000) >> 24);
- }
-
- return useHex ? toHexadecimal(digest) : toZBase32(digest);
- }
-
- /**
- * Encodes a buffer into its hexadecimal representation.
- *
- * @param digest The input buffer to encode.
- * @return The hexadecimal representation of the input buffer.
- */
- public static String toHexadecimal(byte[] digest) {
- StringBuffer sb = new StringBuffer();
- for ( int i = 0; i < digest.length; i++ ) {
- int high = (digest[i] & 0xF0) >> 4;
- int low = digest[i] & 0x0F;
+ // Either SHA2-256 is not available (which should likely never happen), or the
+ // user specifically asked for FNV64 hashes.
+ long hash = FNV64_OFFSET;
+ for ( byte b : mapping.toSExpr().getBytes(StandardCharsets.UTF_8) ) {
+ hash ^= b;
+ hash *= FNV64_PRIME;
+ }
- sb.append((char) (high >= 10 ? high - 10 + 'A' : high + '0'));
- sb.append((char) (low >= 10 ? low - 10 + 'A' : low + '0'));
+ StringBuffer sb = new StringBuffer();
+ for ( int i = 0; i < 8; i++ ) {
+ int hi = (byte) ((hash >> (i * 8 + 4)) & 0x0F);
+ int lo = (byte) ((hash >> (i * 8)) & 0x0F);
+ sb.append((char) (hi >= 10 ? hi - 10 + 'A' : hi + '0'));
+ sb.append((char) (lo >= 10 ? lo - 10 + 'A' : lo + '0'));
+ }
+ return sb.toString();
}
- return sb.toString();
}
/**
diff --git a/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java b/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java
index 530312e9..b5790f6d 100644
--- a/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java
+++ b/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java
@@ -58,9 +58,13 @@ void testHexadecimalEncoding() {
m.setConfidence(0.7);
m.getExtensions(true).put("PROPERTY", new ExtensionValue(LocalDate.of(2025, 6, 1)));
- // Expected hex-encoded SHA2-256 hash of
+ // Expected hex-encoded FNV64 hash of
// "(7:mapping((10:subject_id7:SUBJECT)(9:author_id(6:AUTHOR))(10:confidence3:0.7)(10:extensions((8:PROPERTY10:2025-06-01)))))"
- String hash = "D24A2324E03879F2CFB8E713FAB3CBABAFD3B2CA7F010F4AEA307264C5198327";
+ //
+ // Obtained with:
+ // FNVhash -t 64 ""
+ // where FNVhash is the test program provided with RFC 9923
+ String hash = "D24B0A1BF19449DD";
Assertions.assertEquals(hash, new MappingHasher(true).hash(m));
}
}
diff --git a/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java b/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java
index 45f8d6f4..c81406d1 100644
--- a/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java
+++ b/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java
@@ -75,6 +75,7 @@ public class MappingFormatter {
private PrefixManager pfxMgr;
private MappingHasher hasher = new MappingHasher();
+ private MappingHasher hexHasher = new MappingHasher(true);
/**
* Sets the prefix manager to use when attempting to resolve a placeholder name
@@ -184,6 +185,7 @@ public void setStandardSubstitutions() {
placeholders.put("subject_type", (m) -> m.getSubjectType());
placeholders.put("hash", (m) -> hasher.hash(m));
+ placeholders.put("hexhash", (m) -> hexHasher.hash(m));
// Don't bother checking if we replaced existing substitutions, always clear the
// cache.