-
Notifications
You must be signed in to change notification settings - Fork 7.7k
[Data] [Core] [3/n] Wire BlockRefCounter into Operators #64191
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,72 @@ | ||
| import threading | ||
| from collections import defaultdict | ||
| from typing import Dict | ||
|
|
||
| import ray | ||
| import ray._private.worker | ||
|
|
||
|
|
||
| class BlockRefCounter: | ||
| """Tracks object-store memory usage per operator via Ray Core callbacks. | ||
|
|
||
| The callback fires when: | ||
| - All Python ObjectRefs wrapping the block's ObjectID are garbage-collected, AND | ||
| - All Ray tasks that received the block as an argument have completed. | ||
| """ | ||
|
|
||
| def __init__(self): | ||
| # Object ID binaries of currently live blocks; used by _on_object_freed | ||
| # to distinguish a racing clear() from a real callback. | ||
| self._registered_ids: set[bytes] = set() | ||
| # (producer_id -> total live bytes); maintained incrementally for O(1) reads. | ||
| self._bytes_by_producer: Dict[str, int] = defaultdict(int) | ||
| self._lock = threading.Lock() | ||
|
|
||
| def on_block_produced( | ||
| self, | ||
| block_ref: "ray.ObjectRef", | ||
| size_bytes: int, | ||
| producer_id: str, | ||
| ) -> None: | ||
| """Register a block and attribute its memory to producer_id. | ||
|
|
||
| Registers a Ray Core out-of-scope callback so that when all references | ||
| to block_ref are gone the bytes are automatically removed from the | ||
| producer's usage. | ||
| """ | ||
| id_binary = block_ref.binary() | ||
|
rayhhome marked this conversation as resolved.
|
||
| with self._lock: | ||
| self._registered_ids.add(id_binary) | ||
| self._bytes_by_producer[producer_id] += size_bytes | ||
|
|
||
|
rayhhome marked this conversation as resolved.
|
||
| def _on_object_freed(id_bytes: bytes) -> None: | ||
| with self._lock: | ||
| if id_bytes not in self._registered_ids: | ||
| # Already cleared (e.g. by clear()), nothing to do. | ||
| return | ||
| self._registered_ids.discard(id_bytes) | ||
| self._bytes_by_producer[producer_id] -= size_bytes | ||
| if self._bytes_by_producer[producer_id] == 0: | ||
| del self._bytes_by_producer[producer_id] | ||
|
rayhhome marked this conversation as resolved.
|
||
|
|
||
| core_worker = ray._private.worker.global_worker.core_worker # type: ignore[attr-defined] | ||
| registered = core_worker.add_object_out_of_scope_callback( | ||
| block_ref, _on_object_freed | ||
| ) | ||
| if not registered: | ||
| _on_object_freed(id_binary) | ||
|
|
||
| def get_object_store_memory_usage(self, producer_id: str) -> int: | ||
| """Total bytes of live blocks attributed to producer_id.""" | ||
| with self._lock: | ||
| return self._bytes_by_producer.get(producer_id, 0) | ||
|
|
||
| def clear(self) -> None: | ||
| """Reset all accounting, e.g. on executor shutdown. | ||
|
|
||
| Any previously registered Ray Core callbacks firing after clear() | ||
| will be silently ignored because _registered_ids is empty. | ||
| """ | ||
| with self._lock: | ||
| self._registered_ids.clear() | ||
| self._bytes_by_producer.clear() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -24,6 +24,7 @@ | |
| ActorPoolInfo, | ||
| AutoscalingActorPool, | ||
| ) | ||
| from ray.data._internal.execution.block_ref_counter import BlockRefCounter | ||
| from ray.data._internal.execution.interfaces.execution_options import ( | ||
| ExecutionOptions, | ||
| ExecutionResources, | ||
|
|
@@ -134,6 +135,8 @@ def __init__( | |
| self, | ||
| task_index: int, | ||
| streaming_gen: ObjectRefGenerator, | ||
| block_ref_counter: BlockRefCounter, | ||
| producer_id: str, | ||
|
Comment on lines
+138
to
+139
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An alternative implementation could be to make callers responsible for updating the reference counter with Advantage is that it would reduce the amount we pass through Will defer you to about what's cleaner |
||
| output_ready_callback: Callable[[RefBundle], None] = lambda bundle: None, | ||
| task_done_callback: TaskDoneCallbackType = lambda exc, worker_stats, driver_stats: None, | ||
|
rayhhome marked this conversation as resolved.
|
||
| block_ready_callback: Callable[ | ||
|
|
@@ -149,6 +152,9 @@ def __init__( | |
| Args: | ||
| task_index: Index of the task. Used for callbacks. | ||
| streaming_gen: The streaming generator of this task. It should yield blocks. | ||
| block_ref_counter: The centralized block reference counter. on_block_produced | ||
| is called for each block yielded by this task. | ||
| producer_id: The id of the operator that produces the blocks from this task. | ||
| output_ready_callback: The callback to call when a new RefBundle is output | ||
| from the generator. | ||
| task_done_callback: The callback to call when the task is done. | ||
|
|
@@ -171,6 +177,8 @@ def __init__( | |
| self._block_ready_callback = block_ready_callback | ||
| self._metadata_ready_callback = metadata_ready_callback | ||
| self._operator_name = operator_name | ||
| self._block_ref_counter: BlockRefCounter = block_ref_counter | ||
| self._producer_id: str = producer_id | ||
|
|
||
| # If the generator hasn't produced block metadata yet, or if the block metadata | ||
| # object isn't available after we get a reference, we need store the pending | ||
|
|
@@ -292,6 +300,9 @@ def on_data_ready(self, max_bytes_to_read: Optional[int]) -> int: | |
| meta_with_schema_bytes | ||
| ) | ||
| meta = meta_with_schema.metadata | ||
| self._block_ref_counter.on_block_produced( | ||
| self._pending_block_ref, meta.size_bytes or 0, self._producer_id | ||
| ) | ||
| self._output_ready_callback( | ||
| RefBundle( | ||
| [BlockEntry(self._pending_block_ref, meta)], | ||
|
|
@@ -444,6 +455,7 @@ def __init__( | |
| self._id = str(uuid.uuid4()) | ||
| # Initialize metrics after data_context is set | ||
| self._metrics = OpRuntimeMetrics(self) | ||
| self._block_ref_counter: Optional[BlockRefCounter] = None | ||
|
rayhhome marked this conversation as resolved.
|
||
|
|
||
| def __reduce__(self): | ||
| raise ValueError("Operator is not serializable.") | ||
|
|
@@ -743,12 +755,21 @@ def num_output_splits(self) -> int: | |
| """ | ||
| return self._num_output_splits | ||
|
|
||
| def start(self, options: ExecutionOptions) -> None: | ||
| def start( | ||
| self, | ||
| options: ExecutionOptions, | ||
| block_ref_counter: Optional[BlockRefCounter] = None, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment -- can this ever actually be Making this just |
||
| ) -> None: | ||
| """Called by the executor when execution starts for an operator. | ||
|
|
||
| Args: | ||
| options: The global options used for the overall execution. | ||
| block_ref_counter: The executor-wide shared counter for tracking | ||
| object-store memory. If omitted, a fresh per-operator counter is used. | ||
| """ | ||
| self._block_ref_counter = ( | ||
| block_ref_counter if block_ref_counter is not None else BlockRefCounter() | ||
| ) | ||
|
rayhhome marked this conversation as resolved.
Outdated
rayhhome marked this conversation as resolved.
Outdated
|
||
| self._started = True | ||
|
|
||
| def can_add_input(self) -> bool: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -183,9 +183,23 @@ def all_inputs_done(self) -> None: | |
| ) | ||
| # NOTE: We don't account object store memory use from intermediate `bulk_fn` | ||
| # outputs (e.g., map outputs for map-reduce). | ||
| output_buffer, self._stats = self._bulk_fn(self._input_buffer.to_list(), ctx) | ||
|
|
||
| # Snapshot input refs before calling bulk_fn. Some bulk_fns (e.g. | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense/be simpler if we made |
||
| # randomize_blocks) forward input ObjectRefs unchanged to the output. | ||
| # We only call on_block_produced for genuinely new refs to avoid | ||
| # double-counting; forwarded refs stay attributed to their original producer. | ||
| input_bundles = self._input_buffer.to_list() | ||
| input_refs = {entry.ref for bundle in input_bundles for entry in bundle.blocks} | ||
| output_buffer, self._stats = self._bulk_fn(input_bundles, ctx) | ||
| self._output_buffer = FIFOBundleQueue(output_buffer) | ||
|
|
||
| for bundle in output_buffer: | ||
| for entry in bundle.blocks: | ||
| if entry.ref not in input_refs: | ||
| self._block_ref_counter.on_block_produced( | ||
| entry.ref, entry.metadata.size_bytes or 0, self.id | ||
| ) | ||
|
|
||
| while self._input_buffer.has_next(): | ||
| refs = self._input_buffer.get_next() | ||
| self._metrics.on_input_dequeued(refs, input_index=0) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,6 +8,7 @@ | |
|
|
||
| from ray._common.utils import env_bool, env_float | ||
| from ray.data._internal.execution import create_resource_allocator | ||
| from ray.data._internal.execution.block_ref_counter import BlockRefCounter | ||
| from ray.data._internal.execution.interfaces.execution_options import ( | ||
| ExecutionOptions, | ||
| ExecutionResources, | ||
|
|
@@ -137,6 +138,8 @@ def __init__( | |
| # operator's output usage. | ||
| self._output_operator = terminal_operator_from_topology(topology) | ||
|
|
||
| self._block_ref_counter = BlockRefCounter() | ||
|
|
||
| self._op_resource_allocator: Optional[ | ||
| "OpResourceAllocator" | ||
| ] = create_resource_allocator(self, data_context) | ||
|
|
@@ -168,6 +171,11 @@ def get_external_consumer_bytes(self) -> int: | |
| """Get the bytes buffered by external consumers.""" | ||
| return self._external_consumer_bytes | ||
|
|
||
| @property | ||
| def block_ref_counter(self) -> BlockRefCounter: | ||
| """The centralized block reference counter for this executor.""" | ||
| return self._block_ref_counter | ||
|
|
||
|
Comment on lines
+174
to
+178
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can probably simplify the dataflow here.
Currently, it's like:
I think it'd be clearer as
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we do this, we can also avoid expanding the |
||
| def _estimate_object_store_memory_usage( | ||
| self, op: "PhysicalOperator", state: "OpState" | ||
| ) -> int: | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.