diff --git a/src/datachain/catalog/catalog.py b/src/datachain/catalog/catalog.py index f8708ae78..ef37ebcbf 100644 --- a/src/datachain/catalog/catalog.py +++ b/src/datachain/catalog/catalog.py @@ -1155,7 +1155,7 @@ def cleanup_dataset_versions(self, job_id: str | None = None) -> int: Clean up dataset versions that are no longer needed. Removes dataset versions that: - - Have status CREATED, FAILED, STALE, or REMOVING + - Have status CREATED, FAILED, or REMOVING - Belong to completed/failed/canceled jobs (not running) - Are session_* datasets from finished jobs (orphaned intermediates) diff --git a/src/datachain/data_storage/metastore.py b/src/datachain/data_storage/metastore.py index 1abd99007..f0b405ea6 100644 --- a/src/datachain/data_storage/metastore.py +++ b/src/datachain/data_storage/metastore.py @@ -343,7 +343,7 @@ def get_dataset_versions_to_clean( job (used during job failure cleanup). When job_id is None, returns all versions that are safe to delete: - - Status CREATED, FAILED, STALE where either: + - Status CREATED, FAILED where either: - the associated job has finished, or - there is no associated job (job_id is NULL) and the version is older than STALE_CREATED_THRESHOLD_HOURS @@ -1831,13 +1831,12 @@ def get_dataset_versions_to_clean( ) .where( or_( - # Incomplete/failed/stale versions from finished jobs + # Incomplete/failed versions from finished jobs and_( dv.c.status.in_( [ DatasetStatus.CREATED, DatasetStatus.FAILED, - DatasetStatus.STALE, DatasetStatus.REMOVING, ] ), diff --git a/src/datachain/dataset.py b/src/datachain/dataset.py index cd825d054..55aaa7de5 100644 --- a/src/datachain/dataset.py +++ b/src/datachain/dataset.py @@ -260,10 +260,8 @@ def __hash__(self): class DatasetStatus: CREATED = 1 - PENDING = 2 FAILED = 3 COMPLETE = 4 - STALE = 6 REMOVING = 7 @@ -371,7 +369,6 @@ def is_final_status(self) -> bool: return self.status in [ DatasetStatus.FAILED, DatasetStatus.COMPLETE, - DatasetStatus.STALE, DatasetStatus.REMOVING, ]