unslothai · danielhanchen · May 24, 2026 · May 24, 2026 · May 24, 2026 · May 24, 2026
diff --git a/unsloth_zoo/llama_cpp.py b/unsloth_zoo/llama_cpp.py
@@ -278,7 +278,18 @@ def install_package(package, sudo = False, print_output = False, print_outputs =
 
     print(f"Unsloth: Installing packages: {package}")
     if not (IS_COLAB_ENVIRONMENT or IS_KAGGLE_ENVIRONMENT):
-        acceptance = input(f"Missing system packages. We need to execute `{install_cmd}` - do you accept? Press ENTER. Type NO if not.")
+        # Non-interactive contexts (Docker w/o TTY, headless CI) raise
+        # EOFError on input(). Treat that like an implicit ENTER ie accept
+        # the install. Opt out via UNSLOTH_AUTO_INSTALL=0.
+        try:
+            acceptance = input(f"Missing system packages. We need to execute `{install_cmd}` - do you accept? Press ENTER. Type NO if not.")
+        except EOFError:
+            if os.environ.get("UNSLOTH_AUTO_INSTALL", "1") != "1":
+                raise RuntimeError(
+                    f"Unsloth: Execution of `{install_cmd}` was cancelled (no TTY and UNSLOTH_AUTO_INSTALL=0)!\n"\
+                    "Please install llama.cpp manually via https://docs.unsloth.ai/basics/troubleshooting-and-faqs#how-do-i-manually-save-to-gguf"
+                )
+            acceptance = ""
         if "no" in str(acceptance).lower():
             raise RuntimeError(
                 f"Unsloth: Execution of `{install_cmd}` was cancelled!\n"\

diff --git a/unsloth_zoo/patching_utils.py b/unsloth_zoo/patching_utils.py
@@ -110,7 +110,11 @@ def patch_torch_compile(debug = False, O3 = False, ignore_errors = True):
     else:
         DEBUGGING = ""
         os.environ.pop("TORCHDYNAMO_VERBOSE", None)
-        os.environ.pop("TORCHINDUCTOR_COMPILE_THREADS", None)
+        # Preserve the single-worker forcing put in place by unsloth/_gpu_init
+        # to keep cgroup-pinned containers from spawning Inductor subprocess
+        # workers that can't see the GPU.
+        if os.environ.get("UNSLOTH_FORCE_SINGLE_COMPILE_WORKER", "0") != "1":
+            os.environ.pop("TORCHINDUCTOR_COMPILE_THREADS", None)
         os.environ.pop("TORCHINDUCTOR_FORCE_DISABLE_CACHES", None)
         os.environ.pop("TORCH_LOGS", None)
         torch._logging.set_logs(all = logging.CRITICAL)

@@ -16,6 +16,7 @@
 
 
 from .common import *
+from .notebook_deps import *
 from .gemma import *
 from .misc import *
 from .gemma3n import *

@@ -43,6 +43,12 @@ def determine_compile_threads():
     # See https://github.com/pytorch/pytorch/blob/ab2294d8289a7757a2fc321cdefac88e2b378edf/torch/_inductor/config.py#L771
     # Windows thread count = 1. See https://github.com/unslothai/unsloth-zoo/pull/187
     if sys.platform == "win32": return 1
+    # Honour the explicit single-worker forcing set by unsloth/_gpu_init for
+    # cgroup-pinned containers where the Inductor compile worker pool cannot
+    # see the GPU. Otherwise determine_compile_threads ignores the env var
+    # and the options dict still passes the multi-worker default.
+    if os.environ.get("TORCHINDUCTOR_COMPILE_THREADS") == "1":
+        return 1
     cpu_count = os.cpu_count()
     return min(32, max(4, cpu_count))
 pass

@@ -121,6 +121,19 @@ def _gemma3_call_impl(
             tokenizer_init_kwargs=self.tokenizer.init_kwargs,
             **kwargs,
         )
+        # TRL GRPO paged + reward paths call Gemma3Processor(text=[...]) with no
+        # padding= kwarg; upstream Gemma3ProcessorKwargs default is padding=False
+        # so ragged completions blow up BatchFeature tensor stacking. Force
+        # longest-padding only when caller did not pin padding AND we have >1
+        # text row (single-image inference is byte-identical).
+        _user_padding = kwargs.get("padding", None)
+        if _user_padding is None:
+            _user_padding = kwargs.get("text_kwargs", {}).get("padding", None)
+        _text_rows = (
+            len(text) if isinstance(text, (list, tuple)) and not isinstance(text, str) else 1
+        )
+        if _user_padding is None and _text_rows > 1:
+            output_kwargs["text_kwargs"]["padding"] = "longest"
 
         batched_images = None
         if images is not None:

@@ -1506,3 +1506,24 @@ def _min_pixels(self):
         pass
 pass
 TEMPORARY_PATCHES.append(patch_qwen2vl_image_processor_pixel_attrs)
+
+
+def patch_deepseek_v2_moe_capitalisation_alias():
+    """
+    transformers 5.0 renamed `DeepseekV2MoE` -> `DeepseekV2Moe` (camelCase
+    consistency pass). Remote-code models like deepseek-ai/DeepSeek-OCR
+    ship a modeling file that still imports the old name, so loading them
+    on transformers 5.x raises `ImportError: cannot import name
+    'DeepseekV2MoE'`. Add a backward-compat alias so the old name keeps
+    resolving regardless of which transformers version is installed.
+    Forward-compatible: when transformers 4.x is installed and ships
+    `DeepseekV2MoE` natively, the alias check is a no-op.
+    """
+    try:
+        from transformers.models.deepseek_v2 import modeling_deepseek_v2 as _m
+    except ImportError:
+        return
+    if not hasattr(_m, "DeepseekV2MoE") and hasattr(_m, "DeepseekV2Moe"):
+        _m.DeepseekV2MoE = _m.DeepseekV2Moe
+pass
+TEMPORARY_PATCHES.append(patch_deepseek_v2_moe_capitalisation_alias)
@@ -0,0 +1,224 @@
+# Auto-install missing notebook-only Python deps on first use.
+#
+# Four notebooks failed in the Blackwell docker validation because the slim
+# venv shipped without timm / traitlets / addict / matplotlib, and the
+# raising frame is buried inside HF code (`transformers.utils.import_utils.
+# requires_backends` for TimmWrapper, `transformers.dynamic_module_utils.
+# check_imports` for the Deepseek-OCR trust_remote_code modeling file, and
+# a bare ModuleNotFoundError for traitlets from the IPython chain). Wrap
+# all three call sites with a thin retry that pip-installs the offending
+# package (allow-list only) and re-tries the original import. Honours the
+# existing `UNSLOTH_AUTO_INSTALL=0` opt-out (used by `llama_cpp.py`) and
+# the standard offline flags so air-gapped envs keep emitting the
+# upstream ImportError verbatim.
+
+import importlib
+import importlib.metadata
+import importlib.util
+import os
+import shutil
+import site
+import subprocess
+import sys
+
+from ..log import logger
+
+# pypi-name -> import-name (None means same).
+_ALLOW_LIST = {
+    "timm":          None,           # vision backbones (TimmWrapperModel)
+    "addict":        None,           # Deepseek-OCR config dicts
+    "einops":        None,           # Deepseek-OCR deepencoder + many other vision models
+    "easydict":      None,           # Deepseek-OCR deepencoder.py:12 `from easydict import EasyDict`
+    "snac":          None,           # Orpheus TTS neural audio codec
+    "torchcodec":    None,           # HF datasets audio Feature decoder (>= datasets 4.x)
+    "matplotlib":    None,           # Deepseek-OCR + a few HF image utils
+    "traitlets":     None,           # Jupyter/IPython widget chain
+    "soundfile":     None,           # audio processors
+    "librosa":       None,           # audio processors
+    "scipy":         None,           # several processors
+    "pyctcdecode":   None,           # ASR
+    "tiktoken":      None,           # tokenizer remote-code paths
+    "blobfile":      None,           # tiktoken backing store
+    "pillow_heif":   "pillow_heif",  # HEIF images
+    "decord":        None,           # video processors
+    "av":            "av",           # pyav (video processors)
+    "num2words":     None,           # speech text norm
+    "jieba":         None,           # zh tokenizer
+    "sentencepiece": None,           # tokenizers
+}
+
+_AUTO_INSTALL = os.environ.get("UNSLOTH_AUTO_INSTALL", "1") == "1"
+_NO_NETWORK = (
+    os.environ.get("UNSLOTH_OFFLINE", "0") == "1"
+    or os.environ.get("HF_HUB_OFFLINE", "0") == "1"
+    or os.environ.get("TRANSFORMERS_OFFLINE", "0") == "1"
+)
+_attempted: set = set()
+
+
+def _in_venv() -> bool:
+    return (
+        hasattr(sys, "real_prefix")
+        or (getattr(sys, "base_prefix", sys.prefix) != sys.prefix)
+        or bool(os.environ.get("VIRTUAL_ENV"))
+        or bool(os.environ.get("CONDA_PREFIX"))
+    )
+
+
+def _pip_install(pkg: str) -> bool:
+    if pkg in _attempted:
+        return False
+    _attempted.add(pkg)
+    if shutil.which("uv") and _in_venv():
+        cmd = ["uv", "pip", "install", "--quiet", pkg]
+    else:
+        cmd = [
+            sys.executable, "-m", "pip", "install", "--quiet",
+            "--disable-pip-version-check", "--no-input", pkg,
+        ]
+        # Outside a venv on Linux/Mac as non-root: probe write access to
+        # site-packages and fall back to --user. Windows has no geteuid;
+        # site-packages there is usually writable inside the venv anyway.
+        if not _in_venv() and hasattr(os, "geteuid") and os.geteuid() != 0:
+            try:
+                sp = site.getsitepackages()[0]
+                probe = os.path.join(sp, ".unsloth_write_probe")
+                open(probe, "w").close()
+                os.remove(probe)
+            except Exception:
+                cmd.append("--user")
+    logger.warning(
+        f"Unsloth: auto-installing missing notebook dep `{pkg}` via "
+        f"`{' '.join(cmd)}`. Set UNSLOTH_AUTO_INSTALL=0 to disable."
+    )
+    try:
+        r = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    except Exception as e:
+        logger.warning(f"Unsloth: auto-install of `{pkg}` failed to launch: {e}")
+        return False
+    if r.returncode != 0:
+        tail = (r.stderr or "")[-500:]
+        logger.warning(f"Unsloth: auto-install of `{pkg}` failed:\n{tail}")
+        return False
+    importlib.invalidate_caches()
+    try:
+        list(importlib.metadata.distributions())
+    except Exception:
+        pass
+    return True
+
+
+def _try_install_and_import(pkg: str) -> bool:
+    if pkg not in _ALLOW_LIST:
+        return False
+    if not _AUTO_INSTALL or _NO_NETWORK:
+        return False
+    import_name = _ALLOW_LIST[pkg] or pkg.replace("-", "_")
+    if importlib.util.find_spec(import_name) is not None:
+        return True
+    if not _pip_install(pkg):
+        return False
+    return importlib.util.find_spec(import_name) is not None
+
+
+def patch_requires_backends_autoinstall():
+    """
+    Wrap ``transformers.utils.import_utils.requires_backends`` so that an
+    allow-listed missing backend triggers a one-shot pip install and a
+    second attempt. Preserves the original ImportError when the install
+    fails or the dep isn't on the allow-list, so user-facing error bytes
+    stay identical to upstream when ``UNSLOTH_AUTO_INSTALL=0``.
+    """
+    try:
+        from transformers.utils import import_utils as iu
+    except Exception:
+        return  # transformers absent (MLX-only path) -- nothing to patch.
+    if getattr(iu.requires_backends, "_unsloth_patched", False):
+        return
+    _orig = iu.requires_backends
+
+    def requires_backends(obj, backends):
+        try:
+            return _orig(obj, backends)
+        except ImportError:
+            if not _AUTO_INSTALL or _NO_NETWORK:
+                raise
+            wanted_iter = backends if isinstance(backends, (list, tuple)) else [backends]
+            wanted = [b for b in wanted_iter if isinstance(b, str) and b in _ALLOW_LIST]
+            if not wanted:
+                raise
+            installed_any = False
+            for b in wanted:
+                if _try_install_and_import(b):
+                    installed_any = True
+            if not installed_any:
+                raise
+            for b in wanted:
+                flag = f"_{b.replace('-', '_')}_available"
+                if hasattr(iu, flag):
+                    setattr(iu, flag, True)
+            return _orig(obj, backends)
+
+    requires_backends._unsloth_patched = True
+    iu.requires_backends = requires_backends
+
+
+def patch_check_imports_autoinstall():
+    """
+    trust_remote_code modeling files (e.g. Deepseek-OCR's modeling_deepseekocr.py)
+    declare their import requirements at the top of the file and raise via
+    ``dynamic_module_utils.check_imports`` (ImportError "This modeling file
+    requires the following packages..."). That call site never reaches
+    ``requires_backends``, so wrap it too.
+    """
+    try:
+        from transformers import dynamic_module_utils as dmu
+    except Exception:
+        return
+    if getattr(dmu.check_imports, "_unsloth_patched", False):
+        return
+    _orig = dmu.check_imports
+
+    def check_imports(filename):
+        try:
+            return _orig(filename)
+        except ImportError as e:
+            if not _AUTO_INSTALL or _NO_NETWORK:
+                raise
+            msg = str(e)
+            if "This modeling file requires" not in msg:
+                raise
+            # Message format: "... environment: pkg1, pkg2. Run `pip install...`"
+            try:
+                tail = msg.split("environment:", 1)[1]
+                pkgs_str = tail.split(".", 1)[0]
+            except Exception:
+                raise
+            pkgs = [p.strip() for p in pkgs_str.split(",") if p.strip() in _ALLOW_LIST]
+            if not pkgs:
+                raise
+            ok = all(_try_install_and_import(p) for p in pkgs)
+            if not ok:
+                raise
+            return _orig(filename)
+
+    check_imports._unsloth_patched = True
+    dmu.check_imports = check_imports
+
+
+def _ensure_notebook_chain():
+    """
+    Pre-emptive ensure for deps that raise bare ModuleNotFoundError outside
+    transformers (the Jupyter/IPython chain). Kept tiny: only ``traitlets``
+    is touched today; expand only when a new failure mode appears.
+    """
+    if not _AUTO_INSTALL or _NO_NETWORK:
+        return
+    for pkg in ("traitlets",):
+        if importlib.util.find_spec(pkg) is None:
+            _try_install_and_import(pkg)
+
+
+patch_requires_backends_autoinstall()
+patch_check_imports_autoinstall()
+_ensure_notebook_chain()