Skip to content

Commit 34d69ee

Browse files
Arrow like storage over ByteBuffer to allow dual JVM buffer exchange (#14309)
- #14357 has introduced a _"mean benchmark"_ - that benchmark was slow in _dual JVM mode_ - the goal of this pull request is to speed it up - the idea is to reuse work done by #13904 to - convert `ColumnStorage` from an (inherently slow) `java.lang.reflect.Proxy` provided by the _other JVM_ and ... - make local storage copy when creating a new `Column` - done in 4a052e8 with following std-table API changes: - [Builder.makeLocal](https://github.com/enso-org/enso/pull/14309/files#diff-cc47a2f40d4a3f8c9e0dccc1ac3c2a7d4401d695906a28a147cf0a3373c975a2R102) - [ColumnStorage rawData & rawValidity](https://github.com/enso-org/enso/pull/14309/files#diff-14823bc3b72e198d1b10c9d7bed4374c1f924726e6f52f0ce6598181891d3275R22) - the `sbt "std-benchmarks/benchOnly mean"` benchmark is now [fast in both configurations](#14309 (comment))
1 parent 99916d5 commit 34d69ee

File tree

16 files changed

+425
-88
lines changed

16 files changed

+425
-88
lines changed

app/electron-client/tests/electronTest.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ export async function getNewestProject(page: Page): Promise<Locator> {
154154
const dataCatalogTab = page.getByRole('tab', { name: 'Data Catalog' })
155155
await dataCatalogTab.click()
156156

157+
await expect(page.getByTestId('drive-view')).toBeVisible({ timeout: LOADING_TIMEOUT })
158+
157159
const projects = await page
158160
.getByTestId('drive-view')
159161
.getByText(/New Project \d+/)
@@ -167,6 +169,11 @@ export async function getNewestProject(page: Page): Promise<Locator> {
167169
}),
168170
)
169171

172+
if (numbered.length == 0) {
173+
console.error('No projects found: ' + projects + ", let's again!")
174+
const again: Locator = await getNewestProject(page)
175+
return again
176+
}
170177
return numbered.reduce((a, b) => (a.num > b.num ? a : b)).locator
171178
}
172179

build.sbt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2642,6 +2642,7 @@ lazy val `runtime-language-arrow` =
26422642
javaModuleName := "org.enso.interpreter.arrow",
26432643
inConfig(Compile)(truffleRunOptionsSettings),
26442644
instrumentationSettings,
2645+
customFrgaalJavaCompilerSettings("24"),
26452646
libraryDependencies ++= GraalVM.modules ++ slf4jApi.map(_ % Test) ++ Seq(
26462647
"junit" % "junit" % junitVersion % Test,
26472648
"com.github.sbt" % "junit-interface" % junitIfVersion % Test,
@@ -5289,7 +5290,7 @@ lazy val `std-table` = project
52895290
.in(file("std-bits") / "table")
52905291
.enablePlugins(Antlr4Plugin)
52915292
.settings(
5292-
frgaalJavaCompilerSetting,
5293+
customFrgaalJavaCompilerSettings("24"),
52935294
mockitoAgentSettings,
52945295
autoScalaLibrary := false,
52955296
Compile / compile / compileInputs := (Compile / compile / compileInputs)
@@ -5360,6 +5361,9 @@ lazy val `std-tests` = project
53605361
frgaalJavaCompilerSetting,
53615362
commands += WithDebugCommand.withDebug,
53625363
Test / fork := true,
5364+
Test / javaOptions ++= Seq(
5365+
"-ea"
5366+
),
53635367
autoScalaLibrary := false,
53645368
Compile / compile / compileInputs := (Compile / compile / compileInputs)
53655369
.dependsOn(SPIHelpers.ensureSPIConsistency)
@@ -5371,6 +5375,7 @@ lazy val `std-tests` = project
53715375
)
53725376
.dependsOn(`std-base`)
53735377
.dependsOn(`std-table`)
5378+
.dependsOn(`runtime-language-arrow`)
53745379
.dependsOn(`test-utils`)
53755380

53765381
lazy val `opencv-wrapper` = project

engine/runtime-language-arrow/src/main/java/org/enso/interpreter/arrow/runtime/ByteBufferDirect.java

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -97,13 +97,22 @@ public static ByteBufferDirect fromAddress(long dataAddress, int valueCount, Siz
9797
public static ByteBufferDirect fromAddress(
9898
long dataAddress, long bitmapAddress, int size, SizeInBytes unit) {
9999
var padded = RoundingUtil.forValueCount(size, unit);
100-
ByteBuffer allocated = MemoryUtil.directBuffer(dataAddress, padded.getTotalSizeInBytes());
101-
assert dataAddress + padded.getDataBufferSizeInBytes() == bitmapAddress;
102-
ByteBuffer dataBuffer = allocated.slice(0, padded.getDataBufferSizeInBytes());
103-
dataBuffer.order(ByteOrder.LITTLE_ENDIAN);
104-
ByteBuffer bitmapBuffer =
105-
allocated.slice(dataBuffer.capacity(), padded.getValidityBitmapSizeInBytes());
106-
return new ByteBufferDirect(allocated, dataBuffer, bitmapBuffer);
100+
if (dataAddress + padded.getDataBufferSizeInBytes() == bitmapAddress) {
101+
var allocated = MemoryUtil.directBuffer(dataAddress, padded.getTotalSizeInBytes());
102+
103+
var dataBuffer = allocated.slice(0, padded.getDataBufferSizeInBytes());
104+
dataBuffer.order(ByteOrder.LITTLE_ENDIAN);
105+
var bitmapBuffer =
106+
allocated.slice(dataBuffer.capacity(), padded.getValidityBitmapSizeInBytes());
107+
return new ByteBufferDirect(allocated, dataBuffer, bitmapBuffer);
108+
} else {
109+
assert bitmapAddress != 0;
110+
var dataBuffer = MemoryUtil.directBuffer(dataAddress, padded.getDataBufferSizeInBytes());
111+
dataBuffer.order(ByteOrder.LITTLE_ENDIAN);
112+
var bitmapBuffer =
113+
MemoryUtil.directBuffer(bitmapAddress, padded.getValidityBitmapSizeInBytes());
114+
return new ByteBufferDirect(dataBuffer, dataBuffer, bitmapBuffer);
115+
}
107116
}
108117

109118
@CompilerDirectives.TruffleBoundary
Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,9 @@
11
package org.enso.interpreter.arrow.util;
22

3-
import com.oracle.truffle.api.CompilerDirectives;
4-
import java.lang.invoke.MethodHandle;
5-
import java.lang.invoke.MethodHandles;
6-
import java.lang.reflect.Constructor;
3+
import java.lang.foreign.MemorySegment;
74
import java.nio.ByteBuffer;
85

96
public final class MemoryUtil {
10-
11-
private static MethodHandle byteBufferConstr;
12-
13-
static {
14-
ByteBuffer buffer = null;
15-
try {
16-
buffer = ByteBuffer.allocateDirect(1);
17-
Constructor<?> constr = buffer.getClass().getDeclaredConstructor(long.class, long.class);
18-
constr.setAccessible(true);
19-
byteBufferConstr = MethodHandles.lookup().unreflectConstructor(constr);
20-
} catch (NoSuchMethodException | IllegalAccessException e) {
21-
CompilerDirectives.transferToInterpreter();
22-
throw new ExceptionInInitializerError(
23-
"Unable to find a constructor for ByteBuffer created directly from a memory address");
24-
} finally {
25-
if (buffer != null) {
26-
buffer.clear();
27-
}
28-
}
29-
}
30-
317
private MemoryUtil() {}
328

339
/**
@@ -38,17 +14,7 @@ private MemoryUtil() {}
3814
* @return ByteBuffer instance
3915
*/
4016
public static ByteBuffer directBuffer(long address, long capacity) {
41-
if (byteBufferConstr != null) {
42-
try {
43-
return (ByteBuffer) byteBufferConstr.invoke(address, capacity);
44-
} catch (Throwable e) {
45-
CompilerDirectives.transferToInterpreter();
46-
throw new RuntimeException(e);
47-
}
48-
} else {
49-
CompilerDirectives.transferToInterpreter();
50-
throw new RuntimeException(
51-
"constructor for a ByteBuffer created from a memory address is missing");
52-
}
17+
var seg = MemorySegment.ofAddress(address).reinterpret(capacity);
18+
return seg.asByteBuffer();
5319
}
5420
}

std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.enso.table.data.column.builder;
22

3+
import java.lang.reflect.Proxy;
34
import java.math.BigDecimal;
45
import java.math.BigInteger;
56
import java.time.LocalDate;
@@ -89,6 +90,63 @@ static <T> ColumnStorage<T> makeEmpty(StorageType<T> storageType, long size) {
8990
return storageType.asTypedStorage(unTyped);
9091
}
9192

93+
/**
94+
* Converts a proxy storage to local storage.
95+
*
96+
* @param <T> type of storage
97+
* @param storage the storage instance, possibly a {@link Proxy#isProxyClass proxy}
98+
* @return either {@code storage} itself, or optimized storage of the same {@link
99+
* ColumnStorage#getType() type} over the same {@link ColumnStorage#addressOfData() data}
100+
*/
101+
@SuppressWarnings("unchecked")
102+
static <T> ColumnStorage<T> makeLocal(ColumnStorage<T> storage) {
103+
var data = storage.addressOfData();
104+
if (data != 0) {
105+
var size = Math.toIntExact(storage.getSize());
106+
var validity = storage.addressOfValidity();
107+
var proxyType = storage.getType();
108+
var localType = StorageType.fromTypeCharAndSize(proxyType.typeChar(), proxyType.size());
109+
var localStorage =
110+
switch (localType) {
111+
case IntegerType type ->
112+
LongBuilder.fromAddress(size, data, validity, type).seal(storage, type);
113+
default -> storage;
114+
};
115+
assert assertSameStorages(storage, localStorage);
116+
return (ColumnStorage<T>) localStorage;
117+
}
118+
return storage;
119+
}
120+
121+
private static boolean assertSameStorages(ColumnStorage<?> s1, ColumnStorage<?> s2) {
122+
var sb = new java.lang.StringBuilder();
123+
if (s1.getSize() != s2.getSize()) {
124+
sb.append("Unexpected size %d != %d\n".formatted(s1.getSize(), s2.getSize()));
125+
}
126+
var t1 = s1.getType();
127+
var t2 = s2.getType();
128+
if (t1.typeChar() != t2.typeChar()) {
129+
sb.append("Unexpected type %s != %s\n".formatted(t1.typeChar(), t2.typeChar()));
130+
}
131+
if (t1.size() != t2.size()) {
132+
sb.append("Unexpected type %d != %d\n".formatted(t1.size(), t2.size()));
133+
}
134+
/*
135+
for (var i = 0L; i < s1.getSize(); i++) {
136+
var elem1 = s1.getItemBoxed(i);
137+
var elem2 = s2.getItemBoxed(i);
138+
if (!Objects.equals(elem1, elem2)) {
139+
sb.append(" at %d, but %s != %s\n".formatted(i, elem1, elem2));
140+
}
141+
if (sb.length() > 1024) {
142+
break;
143+
}
144+
}
145+
*/
146+
assert sb.isEmpty() : sb;
147+
return sb.isEmpty();
148+
}
149+
92150
/**
93151
* Constructs a builder accepting values of a specific type.
94152
*

std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ sealed class DoubleBuilder extends NumericBuilder implements BuilderForDouble
2121
protected double[] data;
2222

2323
DoubleBuilder(int initialSize, ProblemAggregator problemAggregator) {
24-
super();
24+
super(initialSize, 0L);
2525
this.data = new double[initialSize];
2626
precisionLossAggregator = new PrecisionLossAggregator(problemAggregator);
2727
}

std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
package org.enso.table.data.column.builder;
22

3+
import java.lang.foreign.MemorySegment;
4+
import java.nio.ByteBuffer;
5+
import java.nio.ByteOrder;
6+
import java.nio.LongBuffer;
37
import java.util.Objects;
48
import org.enso.base.polyglot.NumericConverter;
59
import org.enso.table.data.column.storage.ColumnBooleanStorage;
@@ -18,10 +22,16 @@
1822
sealed class LongBuilder extends NumericBuilder implements BuilderForLong, BuilderWithRetyping
1923
permits BoundCheckedIntegerBuilder {
2024
protected final ProblemAggregator problemAggregator;
21-
protected long[] data;
25+
private LongBuffer data;
2226

2327
protected LongBuilder(int initialSize, ProblemAggregator problemAggregator) {
24-
this.data = new long[initialSize];
28+
this(allocBuffer(initialSize, 0), initialSize, 0, problemAggregator);
29+
}
30+
31+
private LongBuilder(
32+
LongBuffer data, int initialSize, long validity, ProblemAggregator problemAggregator) {
33+
super(initialSize, validity);
34+
this.data = data;
2535
this.problemAggregator = problemAggregator;
2636
}
2737

@@ -33,16 +43,47 @@ static LongBuilder make(int initialSize, IntegerType type, ProblemAggregator pro
3343
}
3444
}
3545

46+
static LongBuilder fromAddress(int size, long address, long validity, IntegerType type) {
47+
assert address != 0;
48+
var buf = allocBuffer(size, address);
49+
var builder = new LongBuilder(buf, size, validity, null);
50+
return builder;
51+
}
52+
53+
/**
54+
* Allocates continuous direct memory buffer. First of all there is a validity bit map (padded to
55+
* 8 bytes) followed by the actual data.
56+
*
57+
* @param size the size of buffer to allocate
58+
* @param data address of data to read or {@code 0} to allocate new data
59+
* @return long buffer representing data
60+
*/
61+
private static LongBuffer allocBuffer(int size, long data) {
62+
var wholeDataSize = Long.BYTES * size;
63+
ByteBuffer buf;
64+
if (data == 0L) {
65+
buf = ByteBuffer.allocateDirect(wholeDataSize).order(ByteOrder.LITTLE_ENDIAN);
66+
} else {
67+
var seg = MemorySegment.ofAddress(data).reinterpret(wholeDataSize);
68+
buf = seg.asByteBuffer().order(ByteOrder.LITTLE_ENDIAN);
69+
}
70+
assert buf.capacity() == wholeDataSize;
71+
var lb = buf.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
72+
assert lb.capacity() == size;
73+
assert lb.order() == ByteOrder.LITTLE_ENDIAN;
74+
return lb;
75+
}
76+
3677
@Override
3778
protected int getDataSize() {
38-
return data.length;
79+
return data.capacity();
3980
}
4081

4182
@Override
4283
protected void resize(int desiredCapacity) {
43-
long[] newData = new long[desiredCapacity];
44-
int toCopy = Math.min(currentSize, data.length);
45-
System.arraycopy(data, 0, newData, 0, toCopy);
84+
var newData = allocBuffer(desiredCapacity, 0);
85+
int toCopy = Math.min(currentSize, data.capacity());
86+
newData.put(0, data, 0, toCopy);
4687
data = newData;
4788
}
4889

@@ -52,7 +93,7 @@ public void copyDataTo(Object[] items) {
5293
if (!isValid(i)) {
5394
items[i] = null;
5495
} else {
55-
items[i] = data[i];
96+
items[i] = data.get(i);
5697
}
5798
}
5899
}
@@ -95,7 +136,7 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
95136
// A fast path for the same type (or compatible) - no conversions/checks needed.
96137
int n = (int) longStorage.getSize();
97138
ensureFreeSpaceFor(n);
98-
System.arraycopy(longStorage.getData(), 0, data, currentSize, n);
139+
data.put(currentSize, longStorage.getData(), 0, n);
99140
appendValidityMap(longStorage.getValidityMap(), n);
100141
currentSize += n;
101142
} else {
@@ -132,10 +173,11 @@ public void appendBulkStorage(ColumnStorage<?> storage) {
132173
*
133174
* @param value the integer to append
134175
*/
176+
@Override
135177
public LongBuilder appendLong(long value) {
136178
ensureSpaceToAppend();
137179
this.setValid(currentSize);
138-
this.data[currentSize++] = value;
180+
this.data.put(currentSize++, value);
139181
return this;
140182
}
141183

@@ -153,13 +195,13 @@ public long getLong(long index) {
153195
if (index >= currentSize) {
154196
throw new IndexOutOfBoundsException();
155197
} else {
156-
return data[(int) index];
198+
return data.get((int) index);
157199
}
158200
}
159201

160202
@Override
161203
public long getCurrentCapacity() {
162-
return data.length;
204+
return data.capacity();
163205
}
164206

165207
@Override
@@ -171,7 +213,8 @@ public LongBuilder appendNulls(int count) {
171213
@Override
172214
public LongBuilder append(Object o) {
173215
if (o == null) {
174-
return appendNulls(1);
216+
doAppendNulls(1);
217+
return this;
175218
}
176219

177220
Long x = NumericConverter.tryConvertingToLong(o);
@@ -186,6 +229,19 @@ public LongBuilder append(Object o) {
186229

187230
@Override
188231
public ColumnStorage<Long> seal() {
189-
return new LongStorage(data, currentSize, validityMap(), getType());
232+
return seal(null, getType());
233+
}
234+
235+
/**
236+
* Seals this buffer as copy of provided storage.
237+
*
238+
* @param otherStorage storage to copy size from if non-{@code null}
239+
* @param type the type to assign to the created storage
240+
* @return locally copied storage
241+
*/
242+
final LongStorage seal(ColumnStorage<?> otherStorage, IntegerType type) {
243+
var buf = data.asReadOnlyBuffer().position(0).limit(currentSize);
244+
var validity = this.validityMap();
245+
return new LongStorage(buf, validity, type, otherStorage);
190246
}
191247
}

0 commit comments

Comments
 (0)