Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 29 additions & 40 deletions core/src/main/java/org/incenp/obofoundry/sssom/MappingHasher.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@
*/
public class MappingHasher {

private final static long FNV64_PRIME = 1099511628211L;
private final static long FNV64_OFFSET = -3750763034362895579L;

// Z-Base32 output alphabet
private static char[] ZB32 = { 'y', 'b', 'n', 'd', 'r', 'f', 'g', '8', 'e', 'j', 'k', 'm', 'c', 'p', 'q', 'x', 'o',
't', '1', 'u', 'w', 'i', 's', 'z', 'a', '3', '4', '5', 'h', '7', '6', '9' };
private MessageDigest md;
private boolean useHex;

/**
* Creates a new instance.
Expand All @@ -43,19 +45,18 @@ public MappingHasher() {
}

/**
* Creates a new instances that produces hexadecimal-encoded hash values.
* Creates a new instance that produces fast, FNV64-based hexadecimal-encoded
* hash values.
*
* @param useHex If <code>true</code>, this instance will produce hash values
* that are encoded in hexadecimal, rather than in ZBase32
* encoding.
* @param useFNV If <code>true</code>, this instance will produce hash values
* using the FNV64 function, rather than the SHA2-256 function.
*/
public MappingHasher(boolean useHex) {
try {
md = MessageDigest.getInstance("SHA-256");
} catch ( NoSuchAlgorithmException e ) {

public MappingHasher(boolean useFNV) {
if ( !useFNV) {
try {
md = MessageDigest.getInstance("SHA-256");
} catch ( NoSuchAlgorithmException e ) { }
}
this.useHex = useHex;
}

/**
Expand All @@ -65,40 +66,28 @@ public MappingHasher(boolean useHex) {
* @return The unique hash for the mapping.
*/
public String hash(Mapping mapping) {
byte[] digest;
if ( md != null ) {
digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8));
byte[] digest = md.digest(mapping.toSExpr().getBytes(StandardCharsets.UTF_8));
md.reset();
return toZBase32(digest);
} else {
// SHA2-256 not available? This should probably never happen, but just in case
// we fall back to the built-in Java hash code.
digest = new byte[4];
int hashcode = mapping.hashCode();
digest[0] = (byte) (hashcode & 0xFF);
digest[1] = (byte) ((hashcode & 0xFF00) >> 8);
digest[2] = (byte) ((hashcode & 0xFF0000) >> 16);
digest[3] = (byte) ((hashcode & 0xFF000000) >> 24);
}

return useHex ? toHexadecimal(digest) : toZBase32(digest);
}

/**
* Encodes a buffer into its hexadecimal representation.
*
* @param digest The input buffer to encode.
* @return The hexadecimal representation of the input buffer.
*/
public static String toHexadecimal(byte[] digest) {
StringBuffer sb = new StringBuffer();
for ( int i = 0; i < digest.length; i++ ) {
int high = (digest[i] & 0xF0) >> 4;
int low = digest[i] & 0x0F;
// Either SHA2-256 is not available (which should likely never happen), or the
// user specifically asked for FNV64 hashes.
long hash = FNV64_OFFSET;
for ( byte b : mapping.toSExpr().getBytes(StandardCharsets.UTF_8) ) {
hash ^= b;
hash *= FNV64_PRIME;
}

sb.append((char) (high >= 10 ? high - 10 + 'A' : high + '0'));
sb.append((char) (low >= 10 ? low - 10 + 'A' : low + '0'));
StringBuffer sb = new StringBuffer();
for ( int i = 0; i < 8; i++ ) {
int hi = (byte) ((hash >> (i * 8 + 4)) & 0x0F);
int lo = (byte) ((hash >> (i * 8)) & 0x0F);
sb.append((char) (hi >= 10 ? hi - 10 + 'A' : hi + '0'));
sb.append((char) (lo >= 10 ? lo - 10 + 'A' : lo + '0'));
}
return sb.toString();
}
return sb.toString();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,13 @@ void testHexadecimalEncoding() {
m.setConfidence(0.7);
m.getExtensions(true).put("PROPERTY", new ExtensionValue(LocalDate.of(2025, 6, 1)));

// Expected hex-encoded SHA2-256 hash of
// Expected hex-encoded FNV64 hash of
// "(7:mapping((10:subject_id7:SUBJECT)(9:author_id(6:AUTHOR))(10:confidence3:0.7)(10:extensions((8:PROPERTY10:2025-06-01)))))"
String hash = "D24A2324E03879F2CFB8E713FAB3CBABAFD3B2CA7F010F4AEA307264C5198327";
//
// Obtained with:
// FNVhash -t 64 "<S-expression>"
// where FNVhash is the test program provided with RFC 9923
String hash = "D24B0A1BF19449DD";
Assertions.assertEquals(hash, new MappingHasher(true).hash(m));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public class MappingFormatter {

private PrefixManager pfxMgr;
private MappingHasher hasher = new MappingHasher();
private MappingHasher hexHasher = new MappingHasher(true);

/**
* Sets the prefix manager to use when attempting to resolve a placeholder name
Expand Down Expand Up @@ -184,6 +185,7 @@ public void setStandardSubstitutions() {
placeholders.put("subject_type", (m) -> m.getSubjectType());

placeholders.put("hash", (m) -> hasher.hash(m));
placeholders.put("hexhash", (m) -> hexHasher.hash(m));

// Don't bother checking if we replaced existing substitutions, always clear the
// cache.
Expand Down
Loading