diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/NodeRecordTask.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/NodeRecordTask.java index b3089f10a..f0cc4f1bb 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/NodeRecordTask.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/NodeRecordTask.java @@ -21,7 +21,12 @@ import io.github.jbellis.jvector.graph.disk.feature.Feature; import io.github.jbellis.jvector.graph.disk.feature.FeatureId; +import java.io.IOException; +import java.io.UncheckedIOException; import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -46,6 +51,7 @@ class NodeRecordTask implements Callable> { private final int recordSize; private final long baseOffset; // Base file offset for L0 (offsets calculated per-ordinal) private final ByteBuffer buffer; + private final Path filePath; /** * Result of building a node record. @@ -71,7 +77,8 @@ static class Result { Map> featureStateSuppliers, int recordSize, long baseOffset, - ByteBuffer buffer) { + ByteBuffer buffer, + Path filePath) { this.startOrdinal = startOrdinal; this.endOrdinal = endOrdinal; this.ordinalMapper = ordinalMapper; @@ -82,6 +89,7 @@ static class Result { this.recordSize = recordSize; this.baseOffset = baseOffset; this.buffer = buffer; + this.filePath = filePath; } @Override @@ -125,16 +133,28 @@ public List call() throws Exception { } // Write inline features + long featureOffset = fileOffset + Integer.BYTES; // After the ordinal for (var feature : inlineFeatures) { var supplier = featureStateSuppliers.get(feature.id()); if (supplier == null) { - // Write zeros for missing supplier - for (int i = 0; i < feature.featureSize(); i++) { - writer.writeByte(0); + // Read existing data from file to preserve what was written by writeInline() + try (var channel = FileChannel.open(filePath, StandardOpenOption.READ)) { + ByteBuffer readBuffer = ByteBuffer.allocate(feature.featureSize()); + int bytesRead = channel.read(readBuffer, featureOffset); + if (bytesRead != feature.featureSize()) { + throw new IOException(String.format( + "Expected to read %d bytes but got %d at offset %d", + feature.featureSize(), bytesRead, featureOffset)); + } + readBuffer.flip(); + writer.write(readBuffer.array(), 0, feature.featureSize()); + } catch (IOException e) { + throw new UncheckedIOException(e); } } else { feature.writeInline(writer, supplier.apply(originalOrdinal)); } + featureOffset += feature.featureSize(); } // Write neighbors diff --git a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/ParallelGraphWriter.java b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/ParallelGraphWriter.java index 1173b30a8..a17979f7f 100644 --- a/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/ParallelGraphWriter.java +++ b/jvector-base/src/main/java/io/github/jbellis/jvector/graph/disk/ParallelGraphWriter.java @@ -211,7 +211,8 @@ public void writeL0Records(OrdinalMapper ordinalMapper, featureStateSuppliers, recordSize, baseOffset, // Base offset (task calculates per-ordinal offsets) - buffer + buffer, + filePath ); return task.call(); diff --git a/jvector-examples/yaml-configs/autoDefault.yml b/jvector-examples/yaml-configs/autoDefault.yml index c408c150f..0df658a28 100644 --- a/jvector-examples/yaml-configs/autoDefault.yml +++ b/jvector-examples/yaml-configs/autoDefault.yml @@ -1,12 +1,12 @@ version: 6 -dataset: cohere-english-v3-100k +dataset: default construction: outDegree: [32] efConstruction: [100] neighborOverflow: [1.2f] - addHierarchy: [Yes] + addHierarchy: [Yes, No] refineFinalGraph: [Yes] fusedGraph: [No] compression: @@ -18,12 +18,12 @@ construction: centerData: No anisotropicThreshold: -1.0 # optional parameter. By default, anisotropicThreshold=-1 (i.e., no anisotropy) reranking: - - NVQ + - FP useSavedIndexIfExists: Yes search: topKOverquery: - 10: [1.0] + 10: [1.0, 2.0, 5.0] useSearchPruning: [Yes] compression: - type: PQ diff --git a/jvector-examples/yaml-configs/default.yml b/jvector-examples/yaml-configs/default.yml index 74a42f973..934f7b85b 100644 --- a/jvector-examples/yaml-configs/default.yml +++ b/jvector-examples/yaml-configs/default.yml @@ -8,7 +8,7 @@ construction: neighborOverflow: [1.2f] addHierarchy: [Yes] refineFinalGraph: [Yes] - fusedGraph: [No] + fusedGraph: [Yes, No] compression: - type: PQ parameters: @@ -18,7 +18,7 @@ construction: centerData: No anisotropicThreshold: -1.0 # optional parameter. By default, anisotropicThreshold=-1 (i.e., no anisotropy) reranking: - - NVQ + - FP useSavedIndexIfExists: Yes search: