From 44143789833d7339f04e8fac27e78af9d1d89881 Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 15 Jun 2026 15:51:00 +0200 Subject: [PATCH 1/3] init --- src/annbatch/loader.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py index 2286c761..1307c295 100644 --- a/src/annbatch/loader.py +++ b/src/annbatch/loader.py @@ -998,6 +998,9 @@ def __iter__( in_memory_indices: None | np.ndarray = self._maybe_accumulate_indices(dataset_index_to_rows) for split in splits: sel = inv[split] + if len(sel) > 0 and np.all(np.diff(sel) == 1): + sel = slice(sel[0], sel[-1] + 1) + data = in_memory_data[sel] yield { "X": data if not self._to_torch else to_torch(data, self._preload_to_gpu), From b32c2c9116f6c7220ff64be503809f46064e948d Mon Sep 17 00:00:00 2001 From: selmanozleyen Date: Mon, 15 Jun 2026 16:00:14 +0200 Subject: [PATCH 2/3] add one more condition to shortcircuit --- src/annbatch/loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py index 1307c295..a7a11370 100644 --- a/src/annbatch/loader.py +++ b/src/annbatch/loader.py @@ -998,7 +998,9 @@ def __iter__( in_memory_indices: None | np.ndarray = self._maybe_accumulate_indices(dataset_index_to_rows) for split in splits: sel = inv[split] - if len(sel) > 0 and np.all(np.diff(sel) == 1): + + # Use basic slicing for contiguous selections to avoid costly fancy indexing on the loaded memory + if len(sel) > 0 and ((sel[-1] - sel[0] == len(sel) - 1 and np.all(np.diff(sel) == 1))): sel = slice(sel[0], sel[-1] + 1) data = in_memory_data[sel] From ed13ead4711e1fe353396ed4d430169cca9c30c7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jun 2026 14:01:53 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/annbatch/loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/annbatch/loader.py b/src/annbatch/loader.py index a7a11370..d5c85dbb 100644 --- a/src/annbatch/loader.py +++ b/src/annbatch/loader.py @@ -1000,7 +1000,7 @@ def __iter__( sel = inv[split] # Use basic slicing for contiguous selections to avoid costly fancy indexing on the loaded memory - if len(sel) > 0 and ((sel[-1] - sel[0] == len(sel) - 1 and np.all(np.diff(sel) == 1))): + if len(sel) > 0 and (sel[-1] - sel[0] == len(sel) - 1 and np.all(np.diff(sel) == 1)): sel = slice(sel[0], sel[-1] + 1) data = in_memory_data[sel]