From e83ae51d5fe3b53cef663334f0b4fcf75e2d3264 Mon Sep 17 00:00:00 2001 From: ilongin Date: Tue, 9 Jun 2026 00:35:13 +0200 Subject: [PATCH] Removing obsolete dataset statuses --- src/datachain/catalog/catalog.py | 2 +- src/datachain/data_storage/metastore.py | 5 ++--- src/datachain/dataset.py | 3 --- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/datachain/catalog/catalog.py b/src/datachain/catalog/catalog.py index f3ce21d8c..7f0c8e471 100644 --- a/src/datachain/catalog/catalog.py +++ b/src/datachain/catalog/catalog.py @@ -1151,7 +1151,7 @@ def cleanup_dataset_versions(self, job_id: str | None = None) -> int: Clean up dataset versions that are no longer needed. Removes dataset versions that: - - Have status CREATED, FAILED, STALE, or REMOVING + - Have status CREATED, FAILED, or REMOVING - Belong to completed/failed/canceled jobs (not running) - Are session_* datasets from finished jobs (orphaned intermediates) diff --git a/src/datachain/data_storage/metastore.py b/src/datachain/data_storage/metastore.py index 1abd99007..f0b405ea6 100644 --- a/src/datachain/data_storage/metastore.py +++ b/src/datachain/data_storage/metastore.py @@ -343,7 +343,7 @@ def get_dataset_versions_to_clean( job (used during job failure cleanup). When job_id is None, returns all versions that are safe to delete: - - Status CREATED, FAILED, STALE where either: + - Status CREATED, FAILED where either: - the associated job has finished, or - there is no associated job (job_id is NULL) and the version is older than STALE_CREATED_THRESHOLD_HOURS @@ -1831,13 +1831,12 @@ def get_dataset_versions_to_clean( ) .where( or_( - # Incomplete/failed/stale versions from finished jobs + # Incomplete/failed versions from finished jobs and_( dv.c.status.in_( [ DatasetStatus.CREATED, DatasetStatus.FAILED, - DatasetStatus.STALE, DatasetStatus.REMOVING, ] ), diff --git a/src/datachain/dataset.py b/src/datachain/dataset.py index cd825d054..55aaa7de5 100644 --- a/src/datachain/dataset.py +++ b/src/datachain/dataset.py @@ -260,10 +260,8 @@ def __hash__(self): class DatasetStatus: CREATED = 1 - PENDING = 2 FAILED = 3 COMPLETE = 4 - STALE = 6 REMOVING = 7 @@ -371,7 +369,6 @@ def is_final_status(self) -> bool: return self.status in [ DatasetStatus.FAILED, DatasetStatus.COMPLETE, - DatasetStatus.STALE, DatasetStatus.REMOVING, ]