Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
Expand Down Expand Up @@ -391,6 +392,11 @@ public void checkIntegrity() throws IOException {
delegateFieldsProducer.checkIntegrity();
}

@Override
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
delegateFieldsProducer.checkIntegrity(merge);
}

@Override
public String toString() {
return getClass().getSimpleName()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
Expand Down Expand Up @@ -153,6 +154,12 @@ public void checkIntegrity() throws IOException {
// otherwise returned the raw postings reader
}

@Override
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
// if we read entirely into ram, we already validated.
// otherwise returned the raw postings reader
}

@Override
public String toString() {
return getClass().getSimpleName() + "(fields=" + fields.size() + ")";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
Expand Down Expand Up @@ -77,6 +78,11 @@ public void checkIntegrity() throws IOException {
impl.checkIntegrity();
}

@Override
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
impl.checkIntegrity(merge);
}

@Override
public String toString() {
return getClass().getSimpleName() + "(" + impl + ")";
Expand Down
43 changes: 40 additions & 3 deletions lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFormatTooNewException;
import org.apache.lucene.index.IndexFormatTooOldException;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
Expand Down Expand Up @@ -597,26 +598,62 @@ private static void validateFooter(IndexInput in) throws IOException {
}
}

/**
* Number of bytes between consecutive merge abort checks during {@link
* #checksumEntireFile(IndexInput, MergePolicy.OneMerge)}. Files smaller than this are checksummed
* in one shot without checking for abort.
*/
private static final long ABORT_CHECK_INTERVAL = 1024 * 1024;

/**
* Clones the provided input, reads all bytes from the file, and calls {@link #checkFooter}
*
* <p>Note that this method may be slow, as it must process the entire file. If you just need to
* extract the checksum value, call {@link #retrieveChecksum}.
*/
public static long checksumEntireFile(IndexInput input) throws IOException {
return checksumEntireFile(input, null, Long.MAX_VALUE);
}

/**
* Like {@link #checksumEntireFile(IndexInput)}, but periodically checks whether the provided
* merge has been aborted. This avoids spending a long time checksumming a large file when the
* merge has already been cancelled.
*
* @param input the index input to checksum
* @param merge the merge to check for abort, or {@code null} to behave like {@link
* #checksumEntireFile(IndexInput)}
* @throws MergePolicy.MergeAbortedException if the merge is aborted during checksumming
*/
public static long checksumEntireFile(IndexInput input, MergePolicy.OneMerge merge)
throws IOException {
return checksumEntireFile(input, merge, ABORT_CHECK_INTERVAL);
}

static long checksumEntireFile(
IndexInput input, MergePolicy.OneMerge merge, long abortCheckInterval) throws IOException {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great. maybe at some point this could be generalized into a built-in functional interface/predicate? I do see it as just "passing in an optional progress function".

I feel like the API would be cleaner, but when I looked more, it doesn't seem easy.

for now it seems passing OneMerge directly, like you have it, is probably the reasonable choice. The MergeAbortedException is not "normal" and gets handled in a special way by IndexWriter.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your feedback. Having tried various approach previously, I also think passing OneMerge is the simpler for now, but happy to revisit this if you change your mind.

IndexInput clone = input.clone();
clone.seek(0);
ChecksumIndexInput in = new BufferedChecksumIndexInput(clone);
assert in.getFilePointer() == 0;
if (in.length() < footerLength()) {
final long len = in.length();
if (len < footerLength()) {
throw new CorruptIndexException(
"misplaced codec footer (file truncated?): length="
+ in.length()
+ len
+ " but footerLength=="
+ footerLength(),
input);
}
in.seek(in.length() - footerLength());
final long target = len - footerLength();
if (merge == null || target <= abortCheckInterval) {
in.seek(target);
} else {
while (in.getFilePointer() < target) {
in.seek(Math.min(in.getFilePointer() + abortCheckInterval, target));
merge.checkAborted();
}
}
return checkFooter(in);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public void merge(MergeState mergeState) throws IOException {
for (DocValuesProducer docValuesProducer : mergeState.docValuesProducers) {
if (docValuesProducer != null) {
mergeState.checkAborted();
docValuesProducer.checkIntegrity();
docValuesProducer.checkIntegrity(mergeState.oneMerge);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.DocValuesSkipper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
Expand Down Expand Up @@ -92,6 +93,20 @@ protected DocValuesProducer() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this producer, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* Returns an instance optimized for merging. This instance may only be consumed in the thread
* that called {@link #getMergeInstance()}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ public void merge(MergeState mergeState, NormsProducer norms) throws IOException
final int maxDoc = mergeState.maxDocs[readerIndex];
if (f != null) {
mergeState.checkAborted();
f.checkIntegrity();
f.checkIntegrity(mergeState.oneMerge);
slices.add(new ReaderSlice(docBase, maxDoc, readerIndex));
fields.add(f);
}
Expand Down
15 changes: 15 additions & 0 deletions lucene/core/src/java/org/apache/lucene/codecs/FieldsProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.MergePolicy;

/**
* Abstract API that produces terms, doc, freq, prox, offset and payloads postings.
Expand All @@ -42,6 +43,20 @@ protected FieldsProducer() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this reader, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* Returns an instance optimized for merging. This instance may only be consumed in the thread
* that called {@link #getMergeInstance()}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.ScoreDoc;
Expand All @@ -49,6 +50,20 @@ protected KnnVectorsReader() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this reader, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* If this reader wraps another for {@code field}, return the underlying reader, else return
* {@code this}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public final void merge(MergeState mergeState) throws IOException {
assert reader != null || mergeState.fieldInfos[i].hasVectorValues() == false;
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
reader.checkIntegrity(mergeState.oneMerge);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public void merge(MergeState mergeState) throws IOException {
for (NormsProducer normsProducer : mergeState.normsProducers) {
if (normsProducer != null) {
mergeState.checkAborted();
normsProducer.checkIntegrity();
normsProducer.checkIntegrity(mergeState.oneMerge);
}
}
for (FieldInfo mergeFieldInfo : mergeState.mergeFieldInfos) {
Expand Down
15 changes: 15 additions & 0 deletions lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues;

/**
Expand Down Expand Up @@ -48,6 +49,20 @@ protected NormsProducer() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this producer, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* Returns an instance optimized for merging. This instance may only be used from the thread that
* acquires it.
Expand Down
15 changes: 15 additions & 0 deletions lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.PointValues;

/**
Expand All @@ -41,6 +42,20 @@ protected PointsReader() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this reader, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* Return {@link PointValues} for the given {@code field}. The behavior is undefined if the given
* field doesn't have points enabled on its {@link FieldInfo}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ public void merge(MergeState mergeState) throws IOException {
for (PointsReader reader : mergeState.pointsReaders) {
if (reader != null) {
mergeState.checkAborted();
reader.checkIntegrity();
reader.checkIntegrity(mergeState.oneMerge);
}
}
// merge field at a time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.DataInput;
Expand Down Expand Up @@ -84,6 +85,20 @@ public abstract ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, i
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this reader, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

@Override
public abstract void close() throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFields;

Expand Down Expand Up @@ -47,6 +48,20 @@ protected StoredFieldsReader() {}
*/
public abstract void checkIntegrity() throws IOException;

/**
* Checks consistency of this reader, periodically checking if the provided merge has been
* aborted. Subclasses should override this to propagate the abort check into expensive checksum
* computations.
*
* <p>The default implementation delegates to {@link #checkIntegrity()}.
*
* @param merge the merge to check for abort, or {@code null} for non-interruptible behavior
* @lucene.internal
*/
public void checkIntegrity(MergePolicy.OneMerge merge) throws IOException {
checkIntegrity();
}

/**
* Returns an instance optimized for merging. This instance may not be cloned.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ public int merge(MergeState mergeState) throws IOException {
for (int i = 0; i < mergeState.storedFieldsReaders.length; i++) {
StoredFieldsReader storedFieldsReader = mergeState.storedFieldsReaders[i];
mergeState.checkAborted();
storedFieldsReader.checkIntegrity();
storedFieldsReader.checkIntegrity(mergeState.oneMerge);
subs.add(
new StoredFieldsMergeSub(
new MergeVisitor(mergeState, i),
Expand Down
Loading
Loading