diff --git a/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java b/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java index ee8724d4..955f9d71 100644 --- a/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java +++ b/core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java @@ -29,11 +29,13 @@ */ public class MappingHasher { + private final static long FNV64_PRIME = 1099511628211L; + private final static long FNV64_OFFSET = -3750763034362895579L; + // Z-Base32 output alphabet private static char[] ZB32 = { 'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c', 'p', 'q', 'x', 'o', 't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3', '4', '5', 'h', '7', '6', '9' }; private MessageDigest md; - private boolean useHex; /** * Creates a new instance. @@ -43,19 +45,18 @@ public MappingHasher() { } /** - * Creates a new instances that produces hexadecimal-encoded hash values. + * Creates a new instance that produces fast, FNV64-based hexadecimal-encoded + * hash values. * - * @param useHex If true, this instance will produce hash values - * that are encoded in hexadecimal, rather than in ZBase32 - * encoding. + * @param useFNV If true, this instance will produce hash values + * using the FNV64 function, rather than the SHA2-256 function. */ - public MappingHasher(boolean useHex) { - try { - md = MessageDigest.getInstance("SHA-256"); - } catch ( NoSuchAlgorithmException e ) { - + public MappingHasher(boolean useFNV) { + if ( !useFNV) { + try { + md = MessageDigest.getInstance("SHA-256"); + } catch ( NoSuchAlgorithmException e ) { } } - this.useHex = useHex; } /** @@ -65,40 +66,28 @@ public MappingHasher(boolean useHex) { * @return The unique hash for the mapping. */ public String hash(Mapping mapping) { - byte[] digest; if ( md != null ) { - digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8)); + byte[] digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8)); md.reset(); + return toZBase32(digest); } else { - // SHA2-256 not available? This should probably never happen, but just in case - // we fall back to the built-in Java hash code. - digest = new byte[4]; - int hashcode = mapping.hashCode(); - digest[0] = (byte) (hashcode & 0xFF); - digest[1] = (byte) ((hashcode & 0xFF00) >> 8); - digest[2] = (byte) ((hashcode & 0xFF0000) >> 16); - digest[3] = (byte) ((hashcode & 0xFF000000) >> 24); - } - - return useHex ? toHexadecimal(digest) : toZBase32(digest); - } - - /** - * Encodes a buffer into its hexadecimal representation. - * - * @param digest The input buffer to encode. - * @return The hexadecimal representation of the input buffer. - */ - public static String toHexadecimal(byte[] digest) { - StringBuffer sb = new StringBuffer(); - for ( int i = 0; i < digest.length; i++ ) { - int high = (digest[i] & 0xF0) >> 4; - int low = digest[i] & 0x0F; + // Either SHA2-256 is not available (which should likely never happen), or the + // user specifically asked for FNV64 hashes. + long hash = FNV64_OFFSET; + for ( byte b : mapping.toSExpr().getBytes(StandardCharsets.UTF_8) ) { + hash ^= b; + hash *= FNV64_PRIME; + } - sb.append((char) (high >= 10 ? high - 10 + 'A' : high + '0')); - sb.append((char) (low >= 10 ? low - 10 + 'A' : low + '0')); + StringBuffer sb = new StringBuffer(); + for ( int i = 0; i < 8; i++ ) { + int hi = (byte) ((hash >> (i * 8 + 4)) & 0x0F); + int lo = (byte) ((hash >> (i * 8)) & 0x0F); + sb.append((char) (hi >= 10 ? hi - 10 + 'A' : hi + '0')); + sb.append((char) (lo >= 10 ? lo - 10 + 'A' : lo + '0')); + } + return sb.toString(); } - return sb.toString(); } /** diff --git a/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java b/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java index 530312e9..b5790f6d 100644 --- a/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java +++ b/core/src/test/java/org/incenp/obofoundry/sssom/MappingHasherTest.java @@ -58,9 +58,13 @@ void testHexadecimalEncoding() { m.setConfidence(0.7); m.getExtensions(true).put("PROPERTY", new ExtensionValue(LocalDate.of(2025, 6, 1))); - // Expected hex-encoded SHA2-256 hash of + // Expected hex-encoded FNV64 hash of // "(7:mapping((10:subject_id7:SUBJECT)(9:author_id(6:AUTHOR))(10:confidence3:0.7)(10:extensions((8:PROPERTY10:2025-06-01)))))" - String hash = "D24A2324E03879F2CFB8E713FAB3CBABAFD3B2CA7F010F4AEA307264C5198327"; + // + // Obtained with: + // FNVhash -t 64 "" + // where FNVhash is the test program provided with RFC 9923 + String hash = "D24B0A1BF19449DD"; Assertions.assertEquals(hash, new MappingHasher(true).hash(m)); } } diff --git a/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java b/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java index 45f8d6f4..c81406d1 100644 --- a/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java +++ b/ext/src/main/java/org/incenp/obofoundry/sssom/transform/MappingFormatter.java @@ -75,6 +75,7 @@ public class MappingFormatter { private PrefixManager pfxMgr; private MappingHasher hasher = new MappingHasher(); + private MappingHasher hexHasher = new MappingHasher(true); /** * Sets the prefix manager to use when attempting to resolve a placeholder name @@ -184,6 +185,7 @@ public void setStandardSubstitutions() { placeholders.put("subject_type", (m) -> m.getSubjectType()); placeholders.put("hash", (m) -> hasher.hash(m)); + placeholders.put("hexhash", (m) -> hexHasher.hash(m)); // Don't bother checking if we replaced existing substitutions, always clear the // cache.