From 4b12e525436584547d7df2e75195e30998290c8c Mon Sep 17 00:00:00 2001 From: liuweisong Date: Tue, 16 Jun 2026 15:40:41 +0000 Subject: [PATCH] fix(fully-async): respect partial_rollout=False when requeuing ABORTED groups In the standard path, sglang_rollout.py::abort() discards ABORTED samples when partial_rollout=False. _AsyncRolloutWorker._make_done_cb previously requeued ABORTED groups unconditionally, bypassing this contract and causing tokens generated under stale weights to enter training without off-policy masking. Guard the requeue with the same partial_rollout flag used in abort(). --- slime/rollout/fully_async_rollout.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/slime/rollout/fully_async_rollout.py b/slime/rollout/fully_async_rollout.py index c301075c5c..63f8f1dfa8 100644 --- a/slime/rollout/fully_async_rollout.py +++ b/slime/rollout/fully_async_rollout.py @@ -179,12 +179,14 @@ def _cb(done_task: asyncio.Task) -> None: type(result).__name__, ) return - # Aborted group → requeue, don't ship to training. + # Aborted group → requeue only if partial_rollout is enabled, + # matching the semantics of sglang_rollout.py::abort(). if any(getattr(s, "status", None) == Sample.Status.ABORTED for s in result): - try: - self.data_buffer.add_samples([result]) - except Exception: # noqa: BLE001 - logger.exception("fully-async: failed to requeue aborted group") + if self.args.partial_rollout: + try: + self.data_buffer.add_samples([result]) + except Exception: # noqa: BLE001 + logger.exception("fully-async: failed to requeue aborted group") return self.output_queue.put((gid, result))