Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
package io.github.dfa1.vortex.performance;

import io.github.dfa1.vortex.core.DType;
import io.github.dfa1.vortex.core.PType;
import io.github.dfa1.vortex.reader.array.Array;
import io.github.dfa1.vortex.reader.array.DoubleArray;
import io.github.dfa1.vortex.reader.array.FloatArray;
import io.github.dfa1.vortex.reader.array.IntArray;
import io.github.dfa1.vortex.reader.array.LazyRleByteArray;
import io.github.dfa1.vortex.reader.array.LazyRleIntArray;
import io.github.dfa1.vortex.reader.array.LazyRleLongArray;
import io.github.dfa1.vortex.reader.array.LazySparseDoubleArray;
import io.github.dfa1.vortex.reader.array.LazySparseFloatArray;
import io.github.dfa1.vortex.reader.array.LazySparseIntArray;
import io.github.dfa1.vortex.reader.array.LazySparseLongArray;
import io.github.dfa1.vortex.reader.array.LongArray;
import io.github.dfa1.vortex.reader.array.MaterializedDoubleArray;
import io.github.dfa1.vortex.reader.array.MaterializedFloatArray;
import io.github.dfa1.vortex.reader.array.MaterializedIntArray;
import io.github.dfa1.vortex.reader.array.MaterializedLongArray;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;

/// Microbenchmark for the `Lazy{Sparse,Rle}` `fold` / `forEach` walk paths.
///
/// Exercises every value-type whose walk was hoisted into the shared
/// `SparseArrays.walkPatches` / `RleArrays.walkRuns` helpers, all in one JVM so
/// the shared walker call sites see multiple lambda implementations — the
/// megamorphic condition that a regression (lost inlining / vectorization on
/// the fill / constant-run loops) would show up under. Compare results against
/// the pre-refactor inlined implementations on `main`.
@State(Scope.Benchmark)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@Warmup(iterations = 3, time = 2)
@Measurement(iterations = 5, time = 2)
@Fork(1)
public class LazyArrayWalkBenchmark {

private static final int ROWS = 1_000_000;
private static final int NUM_CHUNKS = (ROWS + 1023) / 1024;
private static final int SPARSE_PATCHES = 1_000;

private static final DType I32 = new DType.Primitive(PType.I32, false);
private static final DType I64 = new DType.Primitive(PType.I64, false);
private static final DType I8 = new DType.Primitive(PType.I8, false);
private static final DType F64 = new DType.Primitive(PType.F64, false);
private static final DType F32 = new DType.Primitive(PType.F32, false);

private Arena arena;

private LazySparseIntArray sparseInt;
private LazySparseLongArray sparseLong;
private LazySparseDoubleArray sparseDouble;
private LazySparseFloatArray sparseFloat;

private LazyRleIntArray rleIntConst;
private LazyRleIntArray rleIntMulti;
private LazyRleLongArray rleLongMulti;
private LazyRleByteArray rleByteMulti;

@Setup
public void setup() {
arena = Arena.ofShared();

// Sparse: mostly fill, few patches — stresses the fill-emission walk.
int[] patchIdx = new int[SPARSE_PATCHES];
for (int k = 0; k < SPARSE_PATCHES; k++) {
patchIdx[k] = k * (ROWS / SPARSE_PATCHES);
}
Array idxArr = intArray(patchIdx);
sparseInt = new LazySparseIntArray(I32, ROWS, 7, intArrayValues(SPARSE_PATCHES), idxArr, 0L);
sparseLong = new LazySparseLongArray(I64, ROWS, 7L, longArrayValues(SPARSE_PATCHES), idxArr, 0L);
sparseDouble = new LazySparseDoubleArray(F64, ROWS, 1.5, doubleArrayValues(SPARSE_PATCHES), idxArr, 0L);
sparseFloat = new LazySparseFloatArray(F32, ROWS, 1.5f, floatArrayValues(SPARSE_PATCHES), idxArr, 0L);

// RLE constant runs: one distinct value per chunk — constant fast path.
long[] constOffsets = new long[NUM_CHUNKS];
int[] constValuesI = new int[NUM_CHUNKS];
for (int c = 0; c < NUM_CHUNKS; c++) {
constOffsets[c] = c;
constValuesI[c] = c;
}
rleIntConst = new LazyRleIntArray(I32, ROWS, constValuesI, new int[NUM_CHUNKS * 1024],
constOffsets, 0L, NUM_CHUNKS, NUM_CHUNKS, 0);

// RLE multi-value: 4 distinct values per chunk — per-row emit path.
int valsPerChunk = 4;
long[] multiOffsets = new long[NUM_CHUNKS];
int[] multiValuesI = new int[NUM_CHUNKS * valsPerChunk];
long[] multiValuesL = new long[NUM_CHUNKS * valsPerChunk];
byte[] multiValuesB = new byte[NUM_CHUNKS * valsPerChunk];
for (int c = 0; c < NUM_CHUNKS; c++) {
multiOffsets[c] = (long) c * valsPerChunk;
for (int j = 0; j < valsPerChunk; j++) {
multiValuesI[c * valsPerChunk + j] = c * 10 + j;
multiValuesL[c * valsPerChunk + j] = c * 10L + j;
multiValuesB[c * valsPerChunk + j] = (byte) (c + j);
}
}
int[] multiIndices = new int[NUM_CHUNKS * 1024];
for (int r = 0; r < multiIndices.length; r++) {
multiIndices[r] = r & (valsPerChunk - 1);
}
long valuesLen = (long) NUM_CHUNKS * valsPerChunk;
rleIntMulti = new LazyRleIntArray(I32, ROWS, multiValuesI, multiIndices,
multiOffsets, 0L, valuesLen, NUM_CHUNKS, 0);
rleLongMulti = new LazyRleLongArray(I64, ROWS, multiValuesL, multiIndices,
multiOffsets, 0L, valuesLen, NUM_CHUNKS, 0);
rleByteMulti = new LazyRleByteArray(I8, ROWS, multiValuesB, multiIndices,
multiOffsets, 0L, valuesLen, NUM_CHUNKS, 0, false);
}

@Benchmark
public int sparseFoldInt() {
return sparseInt.fold(0, Integer::sum);
}

@Benchmark
public long sparseFoldLong() {
return sparseLong.fold(0L, Long::sum);
}

@Benchmark
public double sparseFoldDouble() {
return sparseDouble.fold(0.0, Double::sum);
}

@Benchmark
public double sparseFoldFloat() {
return sparseFloat.fold(0.0, Double::sum);
}

@Benchmark
public int rleFoldIntConst() {
return rleIntConst.fold(0, Integer::sum);
}

@Benchmark
public int rleFoldIntMulti() {
return rleIntMulti.fold(0, Integer::sum);
}

@Benchmark
public long rleFoldLongMulti() {
return rleLongMulti.fold(0L, Long::sum);
}

@Benchmark
public long rleFoldByteMulti() {
return rleByteMulti.fold(0L, Long::sum);
}

private IntArray intArrayValues(int n) {
int[] vs = new int[n];
for (int i = 0; i < n; i++) {
vs[i] = i;
}
return intArray(vs);
}

private IntArray intArray(int[] vs) {
MemorySegment seg = arena.allocate(vs.length * 4L, 4);
for (int i = 0; i < vs.length; i++) {
seg.setAtIndex(ValueLayout.JAVA_INT, i, vs[i]);
}
return new MaterializedIntArray(I32, vs.length, seg.asReadOnly());
}

private LongArray longArrayValues(int n) {
MemorySegment seg = arena.allocate(n * 8L, 8);
for (int i = 0; i < n; i++) {
seg.setAtIndex(ValueLayout.JAVA_LONG, i, i);
}
return new MaterializedLongArray(I64, n, seg.asReadOnly());
}

private DoubleArray doubleArrayValues(int n) {
MemorySegment seg = arena.allocate(n * 8L, 8);
for (int i = 0; i < n; i++) {
seg.setAtIndex(ValueLayout.JAVA_DOUBLE, i, i + 0.25);
}
return new MaterializedDoubleArray(F64, n, seg.asReadOnly());
}

private FloatArray floatArrayValues(int n) {
MemorySegment seg = arena.allocate(n * 4L, 4);
for (int i = 0; i < n; i++) {
seg.setAtIndex(ValueLayout.JAVA_FLOAT, i, i + 0.25f);
}
return new MaterializedFloatArray(F32, n, seg.asReadOnly());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,10 @@ public int getInt(long i) {

@Override
public long fold(long identity, LongBinaryOperator op) {
long acc = identity;
long n = length;
long emitted = 0;
int absRow = offset;
int startChunk = absRow >>> RleArrays.FL_LOG2;
for (int chunkIdx = startChunk; chunkIdx < numChunks && emitted < n; chunkIdx++) {
int rowInChunk = absRow - chunkIdx * RleArrays.FL_CHUNK_SIZE;
int end = Math.min(RleArrays.FL_CHUNK_SIZE, rowInChunk + (int) (n - emitted));
acc = foldChunk(chunkIdx, rowInChunk, end, acc, op);
int count = end - rowInChunk;
emitted += count;
absRow += count;
}
return acc;
long[] acc = {identity};
RleArrays.walkChunks(length, offset, numChunks,
(chunkIdx, rowInChunk, end) -> acc[0] = foldChunk(chunkIdx, rowInChunk, end, acc[0], op));
return acc[0];
}

private long widen(byte v) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,18 +42,8 @@ public int getInt(long i) {

@Override
public void forEachInt(IntConsumer c) {
long n = length;
long emitted = 0;
int absRow = offset;
int startChunk = absRow >>> RleArrays.FL_LOG2;
for (int chunkIdx = startChunk; chunkIdx < numChunks && emitted < n; chunkIdx++) {
int rowInChunk = absRow - chunkIdx * RleArrays.FL_CHUNK_SIZE;
int end = Math.min(RleArrays.FL_CHUNK_SIZE, rowInChunk + (int) (n - emitted));
processChunk(chunkIdx, rowInChunk, end, c);
int count = end - rowInChunk;
emitted += count;
absRow += count;
}
RleArrays.walkChunks(length, offset, numChunks,
(chunkIdx, rowInChunk, end) -> processChunk(chunkIdx, rowInChunk, end, c));
}

private void processChunk(int chunkIdx, int rowInChunk, int end, IntConsumer c) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,8 @@ public long getLong(long i) {

@Override
public void forEachLong(LongConsumer c) {
long n = length;
long emitted = 0;
int absRow = offset;
int startChunk = absRow >>> RleArrays.FL_LOG2;
for (int chunkIdx = startChunk; chunkIdx < numChunks && emitted < n; chunkIdx++) {
int rowInChunk = absRow - chunkIdx * RleArrays.FL_CHUNK_SIZE;
int end = Math.min(RleArrays.FL_CHUNK_SIZE, rowInChunk + (int) (n - emitted));
processChunk(chunkIdx, rowInChunk, end, c);
int count = end - rowInChunk;
emitted += count;
absRow += count;
}
RleArrays.walkChunks(length, offset, numChunks,
(chunkIdx, rowInChunk, end) -> processChunk(chunkIdx, rowInChunk, end, c));
}

private void processChunk(int chunkIdx, int rowInChunk, int end, LongConsumer c) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,10 @@ public int getInt(long i) {

@Override
public long fold(long identity, LongBinaryOperator op) {
long acc = identity;
long n = length;
long emitted = 0;
int absRow = offset;
int startChunk = absRow >>> RleArrays.FL_LOG2;
for (int chunkIdx = startChunk; chunkIdx < numChunks && emitted < n; chunkIdx++) {
int rowInChunk = absRow - chunkIdx * RleArrays.FL_CHUNK_SIZE;
int end = Math.min(RleArrays.FL_CHUNK_SIZE, rowInChunk + (int) (n - emitted));
acc = foldChunk(chunkIdx, rowInChunk, end, acc, op);
int count = end - rowInChunk;
emitted += count;
absRow += count;
}
return acc;
long[] acc = {identity};
RleArrays.walkChunks(length, offset, numChunks,
(chunkIdx, rowInChunk, end) -> acc[0] = foldChunk(chunkIdx, rowInChunk, end, acc[0], op));
return acc[0];
}

private long widen(short v) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,10 @@ public double getDouble(long i) {

@Override
public void forEachDouble(DoubleConsumer c) {
if (patchValues == null) {
for (long r = 0; r < length; r++) {
c.accept(fillValue);
}
return;
}
long numPatches = patchValues.length();
long absStart = offset;
long absEnd = offset + length;
int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart);
long pos = absStart;
while (pos < absEnd && p < numPatches) {
long patchAbs = SparseArrays.readPatchIdx(patchIndices, p);
if (patchAbs >= absEnd) {
break;
}
for (long r = pos; r < patchAbs; r++) {
c.accept(fillValue);
}
c.accept(patchValues.getDouble(p));
pos = patchAbs + 1;
p++;
}
for (long r = pos; r < absEnd; r++) {
c.accept(fillValue);
}
long numPatches = patchValues == null ? 0 : patchValues.length();
SparseArrays.walkPatches(patchIndices, numPatches, offset, offset + length,
() -> c.accept(fillValue),
p -> c.accept(patchValues.getDouble(p)));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,10 @@ public float getFloat(long i) {
@Override
public double fold(double identity, DoubleBinaryOperator op) {
double[] acc = {identity};
if (patchValues == null) {
for (long r = 0; r < length; r++) {
acc[0] = op.applyAsDouble(acc[0], fillValue);
}
return acc[0];
}
long numPatches = patchValues.length();
long absStart = offset;
long absEnd = offset + length;
int p = SparseArrays.findFirstAtOrAfter(patchIndices, numPatches, absStart);
long pos = absStart;
while (pos < absEnd && p < numPatches) {
long patchAbs = SparseArrays.readPatchIdx(patchIndices, p);
if (patchAbs >= absEnd) {
break;
}
for (long r = pos; r < patchAbs; r++) {
acc[0] = op.applyAsDouble(acc[0], fillValue);
}
acc[0] = op.applyAsDouble(acc[0], patchValues.getFloat(p));
pos = patchAbs + 1;
p++;
}
for (long r = pos; r < absEnd; r++) {
acc[0] = op.applyAsDouble(acc[0], fillValue);
}
long numPatches = patchValues == null ? 0 : patchValues.length();
SparseArrays.walkPatches(patchIndices, numPatches, offset, offset + length,
() -> acc[0] = op.applyAsDouble(acc[0], fillValue),
p -> acc[0] = op.applyAsDouble(acc[0], patchValues.getFloat(p)));
return acc[0];
}
}
Loading
Loading