apache · costin · Jun 23, 2026 · Jun 23, 2026 · Jun 24, 2026 · Jun 26, 2026
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -279,7 +279,9 @@ Other
 
 API Changes
 ---------------------
-(No changes)
+
+* GITHUB#16286: Introduce BinaryDocValues#binaryValues to help speed up the
+  retrieval of many binary doc values at once. (Costin Leau)
 
 New Features
 ---------------------

diff --git a/...mark-jmh/src/java/org/apache/lucene/benchmark/jmh/BinaryDocValuesBulkDecodeBenchmark.java b/...mark-jmh/src/java/org/apache/lucene/benchmark/jmh/BinaryDocValuesBulkDecodeBenchmark.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Comparator;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Stream;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.BytesRef;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Benchmarks bulk retrieval of dense binary doc values via {@link BinaryDocValues#binaryValues}.
+ * Compares the per-doc default with the Lucene90 codec override that reads directly from the data
+ * slice.
+ */
+@State(Scope.Thread)
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Warmup(iterations = 3, time = 2)
+@Measurement(iterations = 5, time = 2)
+public class BinaryDocValuesBulkDecodeBenchmark {
+
+  private Directory dir;
+  private DirectoryReader reader;
+  private BinaryDocValues values;
+  private Path path;
+  private int[] docs;
+  private BytesRef[] valueBuffer;
+  private int nextStart;
+
+  @Param({"1000000"})
+  public int docCount;
+
+  @Param({"8", "32", "128"})
+  public int valueLength;
+
+  @Param({"128", "1024"})
+  public int batchSize;
+
+  @Param({"fixed", "variable"})
+  public String encoding;
+
+  @Setup(Level.Trial)
+  public void setup() throws Exception {
+    path = Files.createTempDirectory("binaryDocValuesBulkDecode");
+    dir = MMapDirectory.open(path);
+
+    IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
+    Random random = new Random(0);
+    for (int i = 0; i < docCount; i++) {
+      Document doc = new Document();
+      int len;
+      if (encoding.equals("fixed")) {
+        len = valueLength;
+      } else {
+        len = 1 + random.nextInt(valueLength);
+      }
+      byte[] bytes = new byte[len];
+      random.nextBytes(bytes);
+      doc.add(new BinaryDocValuesField("field", new BytesRef(bytes)));
+      writer.addDocument(doc);
+    }
+    writer.forceMerge(1);
+    reader = DirectoryReader.open(writer);
+    writer.close();
+
+    values = reader.leaves().get(0).reader().getBinaryDocValues("field");
+    docs = new int[batchSize];
+    valueBuffer = new BytesRef[batchSize];
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDown() throws Exception {
+    reader.close();
+    dir.close();
+    if (Files.exists(path)) {
+      try (Stream<Path> walk = Files.walk(path)) {
+        walk.sorted(Comparator.reverseOrder())
+            .forEach(
+                p -> {
+                  try {
+                    Files.delete(p);
+                  } catch (IOException _) {
+                  }
+                });
+      }
+    }
+  }
+
+  @Benchmark
+  @Fork(
+      value = 1,
+      jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
+  public int binaryValuesBulk() throws IOException {
+    return readBatchBulk();
+  }
+
+  @Benchmark
+  @Fork(
+      value = 1,
+      jvmArgsAppend = {"-Xmx2g", "-Xms2g", "-XX:+AlwaysPreTouch"})
+  public int binaryValuesPerDoc() throws IOException {
+    return readBatchPerDoc();
+  }
+
+  private int readBatchBulk() throws IOException {
+    final int maxStart = docCount - batchSize;
+    if (nextStart > maxStart) {
+      nextStart = 0;
+    }
+    for (int i = 0; i < batchSize; i++) {
+      docs[i] = nextStart + i;
+    }
+    nextStart += batchSize;
+
+    values.binaryValues(batchSize, docs, valueBuffer);
+    int checksum = 0;
+    for (int i = 0; i < batchSize; i++) {
+      checksum += valueBuffer[i].length;
+    }
+    return checksum;
+  }
+
+  private int readBatchPerDoc() throws IOException {
+    final int maxStart = docCount - batchSize;
+    if (nextStart > maxStart) {
+      nextStart = 0;
+    }
+
+    int checksum = 0;
+    for (int i = 0; i < batchSize; i++) {
+      int doc = nextStart + i;
+      values.advanceExact(doc);
+      BytesRef ref = BytesRef.deepCopyOf(values.binaryValue());
+      checksum += ref.length;
+    }
+    nextStart += batchSize;
+    return checksum;
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -1230,6 +1230,27 @@ public BytesRef binaryValue() throws IOException {
             bytesSlice.readBytes((long) doc * length, bytes.bytes, 0, length);
             return bytes;
           }
+
+          @Override
+          public void binaryValues(
+              int size, int[] docs, int docsOffset, BytesRef[] values, int valuesOffset)
+              throws IOException {
+            if (size == 0) {
+              return;
+            }
+            byte[] bulk = new byte[size * length];
+            if (isContiguous(size, docs, docsOffset)) {
+              bytesSlice.readBytes((long) docs[docsOffset] * length, bulk, 0, bulk.length);
+            } else {
+              for (int di = docsOffset, bi = 0, end = docsOffset + size; di < end; di++, bi++) {
+                bytesSlice.readBytes((long) docs[di] * length, bulk, bi * length, length);
+              }
+            }
+            for (int i = 0; i < size; i++) {
+              values[valuesOffset + i] = new BytesRef(bulk, i * length, length);
+            }
+            doc = docs[docsOffset + size - 1];
+          }
         };
       } else {
         // variable length
@@ -1252,6 +1273,41 @@ public BytesRef binaryValue() throws IOException {
             bytesSlice.readBytes(startOffset, bytes.bytes, 0, bytes.length);
             return bytes;
           }
+
+          @Override
+          public void binaryValues(
+              int size, int[] docs, int docsOffset, BytesRef[] values, int valuesOffset)
+              throws IOException {
+            if (size == 0) {
+              return;
+            }
+            if (isContiguous(size, docs, docsOffset)) {
+              long firstStart = addresses.get(docs[docsOffset]);
+              long lastEnd = addresses.get(docs[docsOffset + size - 1] + 1L);
+              int totalBytes = (int) (lastEnd - firstStart);
+              byte[] bulk = new byte[totalBytes];
+              bytesSlice.readBytes(firstStart, bulk, 0, totalBytes);
+              for (int di = docsOffset, vi = valuesOffset, end = docsOffset + size;
+                  di < end;
+                  di++, vi++) {
+                int offset = (int) (addresses.get(docs[di]) - firstStart);
+                int len = (int) (addresses.get(docs[di] + 1L) - addresses.get(docs[di]));
+                values[vi] = new BytesRef(bulk, offset, len);
+              }
+            } else {
+              for (int di = docsOffset, vi = valuesOffset, end = docsOffset + size;
+                  di < end;
+                  di++, vi++) {
+                int d = docs[di];
+                long startOffset = addresses.get(d);
+                int len = (int) (addresses.get(d + 1L) - startOffset);
+                byte[] b = new byte[len];
+                bytesSlice.readBytes(startOffset, b, 0, len);
+                values[vi] = new BytesRef(b, 0, len);
+              }
+            }
+            doc = docs[docsOffset + size - 1];
+          }
         };
       }
     } else {

diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValues.java
@@ -18,9 +18,11 @@
 package org.apache.lucene.index;
 
 import java.io.IOException;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.FieldExistsQuery;
 import org.apache.lucene.util.BytesRef;
 
-/** A per-document numeric value. */
+/** A per-document binary value. */
 public abstract class BinaryDocValues extends DocValuesIterator {
 
   /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
@@ -33,4 +35,76 @@ protected BinaryDocValues() {}
    * @return binary value
    */
   public abstract BytesRef binaryValue() throws IOException;
+
+  /**
+   * Bulk retrieval of binary doc values. This API helps reduce the performance impact of virtual
+   * function calls.
+   *
+   * <p>This API behaves as if implemented as below, which is the default implementation:
+   *
+   * <pre><code class="language-java">
+   * public void binaryValues(int size, int[] docs, BytesRef[] values) throws IOException {
+   *   for (int i = 0; i &lt; size; ++i) {
+   *     int doc = docs[i];
+   *     if (advanceExact(doc)) {
+   *       values[i] = BytesRef.deepCopyOf(binaryValue());
+   *     } else {
+   *       values[i] = null;
+   *     }
+   *   }
+   * }
+   * </code></pre>
+   *
+   * <p><b>NOTE</b>: The {@code docs} array is required to be sorted in ascending order with no
+   * duplicates.
+   *
+   * <p><b>NOTE</b>: Documents that don't have a value for this field will have their corresponding
+   * entry set to {@code null}. If you need to exclude documents that don't have a value, then you
+   * could apply a {@link FieldExistsQuery} as a {@link Occur#FILTER} clause. Another option is to
+   * fall back to using {@link #advanceExact} and {@link #binaryValue()} on ranges of doc IDs that
+   * may not be dense, e.g.
+   *
+   * <pre><code class="language-java">
+   * if (size &gt; 0 &amp;&amp; values.advanceExact(docs[0]) &amp;&amp; values.docIDRunEnd() &gt; docs[size - 1]) {
+   *   // use values#binaryValues to retrieve values
+   * } else {
+   *   // some docs may not have a value, use #advanceExact and #binaryValue
+   * }
+   * </code></pre>
+   *
+   * <p><b>NOTE</b>: Each returned {@link BytesRef} is a deep copy owned by the caller and remains
+   * valid after subsequent calls.
+   *
+   * @param size the number of values to retrieve
+   * @param docs the buffer of doc IDs whose values should be looked up
+   * @param values the buffer of values to fill; entries are set to {@code null} when a document
+   *     doesn't have a value
+   */
+  public void binaryValues(int size, int[] docs, BytesRef[] values) throws IOException {
+    binaryValues(size, docs, 0, values, 0);
+  }
+
+  /**
+   * Offset-aware variant of {@link #binaryValues(int, int[], BytesRef[])}. Reads {@code size} doc
+   * IDs starting at {@code docs[docsOffset]} and writes the corresponding values starting at {@code
+   * values[valuesOffset]}. This follows the same convention as {@link System#arraycopy}.
+   *
+   * @param size the number of values to retrieve
+   * @param docs the buffer of doc IDs whose values should be looked up
+   * @param docsOffset first position in {@code docs} to read
+   * @param values the buffer of values to fill; entries are set to {@code null} when a document
+   *     doesn't have a value
+   * @param valuesOffset first position in {@code values} to write
+   */
+  public void binaryValues(
+      int size, int[] docs, int docsOffset, BytesRef[] values, int valuesOffset)
+      throws IOException {
+    for (int di = docsOffset, vi = valuesOffset, end = docsOffset + size; di < end; di++, vi++) {
+      if (advanceExact(docs[di])) {
+        values[vi] = BytesRef.deepCopyOf(binaryValue());
+      } else {
+        values[vi] = null;
+      }
+    }
+  }
 }