Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 59 additions & 32 deletions unsloth_zoo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,26 +92,34 @@ def has_429_exact_full_read(log_dir: str | Path) -> str:
from importlib.util import find_spec
if find_spec("unsloth") is None:
raise ImportError("Please install Unsloth via `pip install unsloth`!")
if find_spec("torch") is None:
raise ImportError(
"Unsloth: Pytorch is not installed. Go to https://pytorch.org/.\n"\
"We also have some installation instructions on our Github page."
)
_HAS_TORCH = find_spec("torch") is not None

# Keep original allocator settings to preserve explicit user config precedence.
_ORIGINAL_PYTORCH_CUDA_ALLOC_CONF = os.environ.get("PYTORCH_CUDA_ALLOC_CONF")
_ORIGINAL_PYTORCH_HIP_ALLOC_CONF = os.environ.get("PYTORCH_HIP_ALLOC_CONF")
_HAS_ORIGINAL_PYTORCH_ALLOC_CONF = "PYTORCH_ALLOC_CONF" in os.environ
if _HAS_TORCH:
_ORIGINAL_PYTORCH_CUDA_ALLOC_CONF = os.environ.get("PYTORCH_CUDA_ALLOC_CONF")
_ORIGINAL_PYTORCH_HIP_ALLOC_CONF = os.environ.get("PYTORCH_HIP_ALLOC_CONF")
_HAS_ORIGINAL_PYTORCH_ALLOC_CONF = "PYTORCH_ALLOC_CONF" in os.environ
else:
_ORIGINAL_PYTORCH_CUDA_ALLOC_CONF = None
_ORIGINAL_PYTORCH_HIP_ALLOC_CONF = None
_HAS_ORIGINAL_PYTORCH_ALLOC_CONF = False

# We support Pytorch 2
# Fixes https://github.com/unslothai/unsloth/issues/38
from importlib.metadata import version as importlib_version
torch_version_raw = str(importlib_version("torch"))
torch_version = str(re.match(r"[0-9\.]{3,}", torch_version_raw).group(0)).split(".")
major_torch, minor_torch = torch_version[0], torch_version[1]
major_torch, minor_torch = int(major_torch), int(minor_torch)
IS_TORCH_2_9_OR_NEWER = (major_torch > 2) or (major_torch == 2 and minor_torch >= 9)
IS_TORCH_ROCM_BUILD = "+rocm" in torch_version_raw.lower()
if _HAS_TORCH:
torch_version_raw = str(importlib_version("torch"))
torch_version = str(re.match(r"[0-9\.]{3,}", torch_version_raw).group(0)).split(".")
major_torch, minor_torch = torch_version[0], torch_version[1]
major_torch, minor_torch = int(major_torch), int(minor_torch)
IS_TORCH_2_9_OR_NEWER = (major_torch > 2) or (major_torch == 2 and minor_torch >= 9)
IS_TORCH_ROCM_BUILD = "+rocm" in torch_version_raw.lower()
else:
torch_version_raw = ""
torch_version = ["0", "0"]
major_torch, minor_torch = 0, 0
IS_TORCH_2_9_OR_NEWER = False
IS_TORCH_ROCM_BUILD = False

# Reduce VRAM usage by reducing fragmentation
# And optimize pinning of memory
Expand Down Expand Up @@ -175,28 +183,28 @@ def clean_expandable_segments_value(value):
return ",".join(parts) if len(parts) else None


if (major_torch < 2):
if _HAS_TORCH and (major_torch < 2):
Comment thread
LeoBorcherding marked this conversation as resolved.
raise ImportError("Unsloth only supports Pytorch 2 for now. Please update your Pytorch to 2.1.\n"\
"We have some installation instructions on our Github page.")
elif (major_torch == 2) and (minor_torch < 2):
elif _HAS_TORCH and (major_torch == 2) and (minor_torch < 2):
# Disable expandable_segments
delete_key("PYTORCH_CUDA_ALLOC_CONF")
delete_key("PYTORCH_HIP_ALLOC_CONF")
delete_key("PYTORCH_ALLOC_CONF")
elif bool(os.environ.get("WSL_DISTRO_NAME") or os.environ.get("WSL_INTEROP")):
elif _HAS_TORCH and bool(os.environ.get("WSL_DISTRO_NAME") or os.environ.get("WSL_INTEROP")):
# Expandable segments does NOT work on WSL
delete_key("PYTORCH_CUDA_ALLOC_CONF")
delete_key("PYTORCH_HIP_ALLOC_CONF")
delete_key("PYTORCH_ALLOC_CONF")
elif os.name == 'nt':
elif _HAS_TORCH and os.name == 'nt':
# Expandable segments does NOT work on Windows
delete_key("PYTORCH_CUDA_ALLOC_CONF")
delete_key("PYTORCH_HIP_ALLOC_CONF")
delete_key("PYTORCH_ALLOC_CONF")
Comment thread
LeoBorcherding marked this conversation as resolved.

# IMPORTANT: run ROCm cleanup before importing device_type (which imports torch).
# HIP allocator settings can be read during torch initialization.
if IS_TORCH_ROCM_BUILD:
if _HAS_TORCH and IS_TORCH_ROCM_BUILD:
remove_expandable_segments("PYTORCH_CUDA_ALLOC_CONF")
remove_expandable_segments("PYTORCH_HIP_ALLOC_CONF")
remove_expandable_segments("PYTORCH_ALLOC_CONF")
Expand All @@ -217,14 +225,26 @@ def filter(self, x): return not (self.text in x.getMessage())
del logging, torchao_logger, HideLoggingMessage

# Get device types and other variables
from .device_type import (
is_hip,
get_device_type,
DEVICE_TYPE,
DEVICE_TYPE_TORCH,
DEVICE_COUNT,
ALLOW_PREQUANTIZED_MODELS,
)
if _HAS_TORCH:
from .device_type import (
is_hip,
get_device_type,
DEVICE_TYPE,
DEVICE_TYPE_TORCH,
DEVICE_COUNT,
ALLOW_PREQUANTIZED_MODELS,
)
else:
def is_hip():
return False

def get_device_type():
return "cpu"

DEVICE_TYPE = "cpu"
DEVICE_TYPE_TORCH = "cpu"
DEVICE_COUNT = 1
ALLOW_PREQUANTIZED_MODELS = False
IS_HIP_RUNTIME = (DEVICE_TYPE == "hip") or bool(is_hip())

# Torch >= 2.9 uses PYTORCH_ALLOC_CONF and treats legacy per-backend vars as deprecated.
Expand Down Expand Up @@ -260,13 +280,13 @@ def filter(self, x): return not (self.text in x.getMessage())
remove_expandable_segments("PYTORCH_HIP_ALLOC_CONF")
remove_expandable_segments("PYTORCH_ALLOC_CONF")
delete_key("PYTORCH_CUDA_ALLOC_CONF")
elif DEVICE_TYPE == "cuda" and not IS_HIP_RUNTIME and not IS_TORCH_2_9_OR_NEWER:
elif _HAS_TORCH and DEVICE_TYPE == "cuda" and not IS_HIP_RUNTIME and not IS_TORCH_2_9_OR_NEWER:
delete_key("PYTORCH_HIP_ALLOC_CONF")
delete_key("PYTORCH_ALLOC_CONF")

# CCE fails on Torch 2.8 and above
# OutOfResources: out of resource: shared memory, Required: 98304, Hardware limit: 65536. Reducing block sizes or `num_stages`
if (major_torch >= 2 and minor_torch >= 8) or (major_torch > 2):
if _HAS_TORCH and ((major_torch >= 2 and minor_torch >= 8) or (major_torch > 2)):
os.environ["UNSLOTH_ENABLE_CCE"] = "0"
elif DEVICE_TYPE == "hip":
# CCE also fails in HIP / AMD
Expand All @@ -286,9 +306,15 @@ def filter(self, x): return not (self.text in x.getMessage())
# Log Unsloth-Zoo Utilities
os.environ["UNSLOTH_ZOO_IS_PRESENT"] = "1"

from .temporary_patches import (
encode_conversations_with_harmony,
)
if _HAS_TORCH:
from .temporary_patches import (
encode_conversations_with_harmony,
)
else:
def encode_conversations_with_harmony(*args, **kwargs):
raise ImportError(
"Unsloth: encode_conversations_with_harmony requires torch. Install torch to enable this feature."
)
from .rl_environments import (
check_python_modules,
create_locked_down_function,
Expand All @@ -310,4 +336,5 @@ def filter(self, x): return not (self.text in x.getMessage())
except:
pass

del _HAS_TORCH
del os, warnings, re
4 changes: 2 additions & 2 deletions unsloth_zoo/device_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def get_device_type():
amd_hint = _amd_installation_hint()
if amd_hint is not None:
raise NotImplementedError(amd_hint)
raise NotImplementedError("Unsloth cannot find any torch accelerator? You need a GPU.")
return "cpu"
accelerator = str(torch.accelerator.current_accelerator())
if accelerator in ("cuda", "xpu", "hip"):
raise RuntimeError(
Expand All @@ -226,7 +226,7 @@ def get_device_type():
amd_hint = _amd_installation_hint()
if amd_hint is not None:
raise NotImplementedError(amd_hint)
raise NotImplementedError("Unsloth currently only works on NVIDIA, AMD and Intel GPUs.")
return "cpu"
pass
DEVICE_TYPE : str = get_device_type()
# HIP fails for autocast and other torch functions. Use CUDA instead
Expand Down
5 changes: 4 additions & 1 deletion unsloth_zoo/temporary_patches/gpt_oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,8 +1137,11 @@ def patch_gpt_oss_bnb4bit_auto():

if DEVICE_TYPE == "xpu":
device_memory = torch.xpu.memory.mem_get_info(0)[-1]
else:
elif DEVICE_TYPE in ("cuda", "hip") and hasattr(torch, "cuda") and torch.cuda.is_available():
device_memory = torch.cuda.memory.mem_get_info(0)[-1]
else:
# CPU-only and no-accelerator builds should not query CUDA memory.
device_memory = 0
use_combo_kernels = False if device_memory/1024/1024/1024 <= 40 else True
fused_torch_compile_options = get_torch_compile_options(
epilogue_fusion = True,
Expand Down
2 changes: 1 addition & 1 deletion unsloth_zoo/temporary_patches/moe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def _check_grouped_gemm_available():
from unsloth.kernels.moe.grouped_gemm.interface import grouped_gemm, supports_tma
_GROUPED_GEMM_AVAILABLE = True
_init_triton_allocator()
except (ImportError, ModuleNotFoundError):
except Exception:
Comment thread
LeoBorcherding marked this conversation as resolved.
_GROUPED_GEMM_AVAILABLE = False
return _GROUPED_GEMM_AVAILABLE

Expand Down