diff --git a/.github/workflows/studio-backend-ci.yml b/.github/workflows/studio-backend-ci.yml deleted file mode 100644 index 5a858888e7..0000000000 --- a/.github/workflows/studio-backend-ci.yml +++ /dev/null @@ -1,200 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Runs the existing studio/backend/tests/ suite (~860 tests, all CPU-friendly) -# on every PR that touches the backend or unsloth library. Until this lands, -# none of those tests run automatically. Verified locally on Python 3.13 with -# the surgical exclusions below: 861 pass, 4 skipped. -# -# Exclusions: -# - tests/test_studio_api.py: end-to-end against a live model + GGUF download, -# too heavy for free runners. Run separately when GPU CI is available. -# - -k 'not llama_cpp_load_progress_live': spawns a real llama.cpp process, -# not appropriate for CPU-only runners. -# -# ruff is non-blocking initially; remove `|| true` once the backend lints clean. - -name: Backend CI - -on: - pull_request: - paths: - - 'studio/**' - - 'unsloth/**' - - 'unsloth_cli/**' - - 'tests/**' - - 'pyproject.toml' - - '.github/workflows/studio-backend-ci.yml' - push: - branches: [main, pip] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - pytest: - name: (Python ${{ matrix.python }}) - runs-on: ubuntu-latest - timeout-minutes: 15 - strategy: - fail-fast: false - matrix: - python: ['3.10', '3.11', '3.12', '3.13'] - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: '${{ matrix.python }}' - cache: 'pip' - - - name: Install backend test dependencies (CPU only) - run: | - python -m pip install --upgrade pip - # Studio's declared backend deps: - pip install -r studio/backend/requirements/studio.txt - # Extras that studio.txt does not list but the import chain needs - # (python-multipart for FastAPI form/file uploads, sqlalchemy/cryptography - # for the auth DB, yaml/jinja2 for utils.models.model_config, etc.): - pip install \ - python-multipart aiofiles sqlalchemy cryptography \ - pyyaml jinja2 mammoth unpdf requests \ - 'numpy<3' pytest pytest-asyncio httpx - # Torch CPU + transformers are required by a chunk of the backend test - # suite (gpu_selection, kv_cache_estimation, utils). CPU-only torch - # keeps the install ~250 MB / ~1 min on a clean runner. - pip install --index-url https://download.pytorch.org/whl/cpu 'torch>=2.4,<2.11' - pip install 'transformers>=4.51,<5.5' - - - name: Backend tests - working-directory: studio/backend - # Locally validated against this dep set: 831 passed, 5 skipped, 35 deselected. - # Deselections (all environment-specific, would never pass on a GPU-less - # `ubuntu-latest` runner regardless of code correctness): - # - llama_cpp_load_progress_live: spawns a real llama.cpp process - # - TestGpuAutoSelection / TestPreSpawnGpuResolution / TestPerGpuFitGuardAllCounts: - # require live transformers config introspection on real GPUs - # - TestTransformersIntrospection: same - # - test_returns_cuda_when_cuda_available / test_calls_cuda_cache_when_cuda: - # assume CUDA-capable GPU - run: | - python -m pytest tests/ -q --tb=short \ - --ignore=tests/test_studio_api.py \ - -k 'not llama_cpp_load_progress_live and not TestGpuAutoSelection and not TestPreSpawnGpuResolution and not TestPerGpuFitGuardAllCounts and not TestTransformersIntrospection and not test_returns_cuda_when_cuda_available and not test_calls_cuda_cache_when_cuda' - - repo-cpu-tests: - # Auto-discover everything under tests/ that is not GPU-bound by - # design. New tests added in covered directories are picked up - # without a workflow edit. Locally validated: 779 passed, 11 - # skipped, 23 deselected. tests/conftest.py (mirroring unsloth-zoo - # PR #624) pre-loads unsloth_zoo.device_type and unsloth.device_type - # under a mocked torch.cuda.is_available so the unsloth import - # chain succeeds on CPU. - name: Repo tests (CPU) - runs-on: ubuntu-latest - timeout-minutes: 10 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - cache: 'pip' - - - name: Install deps (shared shape with backend pytest job) - run: | - python -m pip install --upgrade pip - pip install -r studio/backend/requirements/studio.txt - pip install \ - python-multipart aiofiles sqlalchemy cryptography \ - pyyaml jinja2 mammoth unpdf requests typer \ - 'numpy<3' pytest pytest-asyncio httpx - # torchvision is needed because unsloth_zoo.vision_utils imports - # it at module scope and is reached via unsloth.models._utils. - pip install --index-url https://download.pytorch.org/whl/cpu \ - 'torch>=2.4,<2.11' 'torchvision<0.26' - pip install 'transformers>=4.51,<5.5' - # bitsandbytes is a hard import in unsloth/models/_utils.py. - # Recent versions ship a CPU build so it installs on a free - # Linux runner; the kernels still raise on use, but import - # succeeds and the package collects. - pip install 'bitsandbytes>=0.45' - # unsloth.device_type imports unsloth_zoo.utils.Version at module - # scope, so the conftest harness needs unsloth_zoo on the path - # even though it is an optional dep of unsloth. - pip install 'unsloth_zoo>=2026.5.1' - pip install -e . --no-deps - - - name: Repo tests (CPU, auto-discovered) - env: - # tests/python/* import install_python_stack from studio/. - PYTHONPATH: ${{ github.workspace }}/studio - # Skip lazy compilation work the unsloth import chain wants to - # do at import time on a real GPU. - UNSLOTH_COMPILE_DISABLE: '1' - # --ignore: GPU-bound directories (qlora and saving need real - # weights / GPU; tests/sh is a shell suite the next step - # handles; tests/utils is a helpers folder, not tests). - # State-sensitive hardware-spoofing files are pulled out and run - # in isolation in the next step because they mutate - # hardware.py module globals (IS_ROCM / DEVICE) and pollute - # downstream tests. - # -m: honour markers already declared in tests/python/conftest.py - # (`server` = needs studio venv, `e2e` = needs network). - # --deselect: two registry tests that hit huggingface_hub for - # live model existence checks; they belong on a network job. - run: | - python -m pytest tests/ -q --tb=short \ - --ignore=tests/qlora \ - --ignore=tests/saving \ - --ignore=tests/utils \ - --ignore=tests/sh \ - --ignore=tests/studio/test_hardware_dispatch_matrix.py \ - --ignore=tests/studio/test_is_mlx_dispatch_gate.py \ - -m 'not server and not e2e' \ - --deselect tests/test_model_registry.py::test_model_registration \ - --deselect tests/test_model_registry.py::test_all_model_registration - - - name: Hardware-spoof tests (state-sensitive, run in isolation) - env: - PYTHONPATH: ${{ github.workspace }}/studio - UNSLOTH_COMPILE_DISABLE: '1' - # These two files mutate hardware.py module globals at runtime - # via the spoof fixtures, which leaks state into any other test - # that imports hardware. Run them in their own pytest invocation - # so the leak does not cross file boundaries. - run: | - python -m pytest -q --tb=short \ - tests/studio/test_hardware_dispatch_matrix.py \ - tests/studio/test_is_mlx_dispatch_gate.py - - - name: Shell installer tests - # Subset that does not depend on a writable / pristine install.sh - # tree; test_install_host_defaults.sh checks install.ps1 layout - # which has drifted (separate followup). - run: | - set -e - for s in \ - tests/sh/test_get_torch_index_url.sh \ - tests/sh/test_mac_intel_compat.sh \ - tests/sh/test_tauri_install_exit_order.sh \ - tests/sh/test_torch_constraint.sh; do - echo "::group::$s" - bash "$s" - echo "::endgroup::" - done - - ruff: - name: Backend ruff lint (non-blocking) - runs-on: ubuntu-latest - timeout-minutes: 5 - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - cache: 'pip' - - run: pip install ruff - - name: ruff check (non-blocking until accumulated drift is cleared) - run: ruff check studio/backend || true diff --git a/.github/workflows/studio-frontend-ci.yml b/.github/workflows/studio-frontend-ci.yml deleted file mode 100644 index 039bd5dd08..0000000000 --- a/.github/workflows/studio-frontend-ci.yml +++ /dev/null @@ -1,108 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Frontend PR gate: lockfile freshness, typecheck, build, and a bundle grep -# that catches the 2026.5.1 chat-history regression at the JS level. -# -# biome runs as non-blocking for now: the codebase currently has accumulated -# ~470 errors and ~1650 warnings against the existing biome config. Surfacing -# the count in CI lets us drive it down without forcing a fleet-wide cleanup -# in the same PR. Drop `continue-on-error` once that number is zero. - -name: Frontend CI - -on: - pull_request: - paths: - - 'studio/frontend/**' - - '.github/workflows/studio-frontend-ci.yml' - push: - branches: [main, pip] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - build: - name: Frontend build + bundle sanity - runs-on: ubuntu-latest - timeout-minutes: 10 - defaults: - run: - working-directory: studio/frontend - steps: - - uses: actions/checkout@v4 - - # FIXME: drop this step once @assistant-ui/* and assistant-stream - # leave 0.x -- on 1.x, caret ranges are conventional. Until then, - # every 0.minor on this surface is a SemVer-major (this is exactly - # how 2026.5.1 shipped a broken chat runtime: ^0.12.19 quietly - # resolved to 0.12.28). - - name: '@assistant-ui must be pinned exactly (no caret/tilde)' - working-directory: ${{ github.workspace }} - run: | - set -e - if grep -nE '"(@assistant-ui/[a-z-]+|assistant-stream)":[[:space:]]*"[\^~]' studio/frontend/package.json; then - echo "::error file=studio/frontend/package.json::These packages must be pinned to exact versions until they leave 0.x. Drop the leading ^ or ~." - exit 1 - fi - echo "All assistant-ui packages are pinned exactly." - - - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - cache-dependency-path: studio/frontend/package-lock.json - - - name: Lockfile must agree with package.json (npm ci is strict) - run: npm ci --no-fund --no-audit - - - name: npm ci must not have modified the working tree - working-directory: ${{ github.workspace }} - run: | - if ! git diff --quiet -- studio/frontend; then - echo "::error::npm ci modified files; commit the updated lockfile" - git status -- studio/frontend - exit 1 - fi - - - name: Typecheck - run: npm run typecheck - - - name: Build - run: npm run build - - - name: Built bundle must not contain Studio's unstable_Provider call site - run: | - set -e - JS=$(ls dist/assets/index-*.js | head -1) - HITS=$(grep -c 'unstable_Provider:' "$JS" || echo 0) - echo "main bundle: $JS" - echo "unstable_Provider: hits=$HITS (assistant-ui internals contribute up to 3)" - if [ "$HITS" -gt 3 ]; then - echo "::error file=studio/frontend/src/features/chat/runtime-provider.tsx::Studio bundle still passes unstable_Provider through useRemoteThreadListRuntime; this is the 2026.5.1 chat-history regression. Pass adapters directly into useLocalRuntime instead." - exit 1 - fi - - - name: Bundle size budget (75 MB) - run: | - SIZE=$(du -sb dist | cut -f1) - BUDGET=$((75 * 1024 * 1024)) - echo "dist size: $SIZE bytes ($((SIZE/1024/1024)) MB), budget: $BUDGET bytes (75 MB)" - if [ "$SIZE" -gt "$BUDGET" ]; then - echo "::error::studio/frontend/dist/ exceeded the 75 MB budget. Drop dead deps (e.g. the unused next dep) or split chunks." - exit 1 - fi - - - name: Biome (non-blocking until accumulated drift is cleared) - continue-on-error: true - run: npm run biome:check - - - name: Upload built dist on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: studio-frontend-dist - path: studio/frontend/dist - retention-days: 3 diff --git a/.github/workflows/studio-inference-smoke.yml b/.github/workflows/studio-inference-smoke.yml deleted file mode 100644 index 8efe072d28..0000000000 --- a/.github/workflows/studio-inference-smoke.yml +++ /dev/null @@ -1,185 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# End-to-end smoke: install Studio via install.sh --local --no-torch, download -# a tiny GGUF, boot Studio, log in, change password, load the model, send a -# chat completion, assert a non-empty response. Only workflow that tests "the -# app actually works". -# -# Model: Qwen3.5-2B UD-IQ3_XXS (~890 MiB) -- small enough that the cache miss -# is cheap and inference fits in the 25 min CPU-runner budget. GGUF is cached -# across runs via actions/cache. - -name: Studio GGUF CI - -on: - pull_request: - paths: - - 'studio/**' - - 'unsloth/**' - - 'unsloth_cli/**' - - 'install.sh' - - 'pyproject.toml' - - '.github/workflows/studio-inference-smoke.yml' - push: - branches: [main, pip] - # Manual trigger for pre-warming the GGUF cache on main, or re-running - # against an arbitrary branch without pushing a no-op commit. - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - GGUF_REPO: unsloth/Qwen3.5-2B-GGUF - GGUF_FILE: Qwen3.5-2B-UD-IQ3_XXS.gguf - STUDIO_PORT: '18888' - -jobs: - inference: - name: Studio boots, loads a GGUF, answers a chat completion - runs-on: ubuntu-latest - timeout-minutes: 25 - steps: - - uses: actions/checkout@v4 - - - name: Linux dependencies for llama.cpp prebuilt - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends \ - libcurl4-openssl-dev libssl-dev jq - - - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - cache-dependency-path: studio/frontend/package-lock.json - - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - cache: 'pip' - - - name: Cache GGUF model file - id: cache-gguf - uses: actions/cache@v4 - with: - path: gguf-cache - key: ${{ runner.os }}-gguf-${{ env.GGUF_REPO }}-${{ env.GGUF_FILE }}-v1 - - - name: Download GGUF if cache miss - if: steps.cache-gguf.outputs.cache-hit != 'true' - run: | - # huggingface-cli was deprecated in huggingface_hub 1.13; the new CLI is `hf`. - python -m pip install --upgrade huggingface_hub hf_transfer - mkdir -p gguf-cache - HF_HUB_ENABLE_HF_TRANSFER=1 \ - hf download "$GGUF_REPO" "$GGUF_FILE" --local-dir gguf-cache - - - name: Install Studio (--local, --no-torch keeps the install lean) - run: | - mkdir -p logs - set -o pipefail - bash install.sh --local --no-torch 2>&1 | tee logs/install.log - - - name: Assert llama.cpp prebuilt was installed (no source-build fallback) - # ubuntu-latest is CPU-only x86_64, so studio/setup.sh should route - # to ggml-org/llama.cpp and grab bin-ubuntu-x64.tar.gz. A source - # build here means the routing regressed. - run: | - if grep -q "falling back to source build" logs/install.log; then - echo "::error::llama.cpp prebuilt path failed on ubuntu-latest. studio/setup.sh routing regressed; CPU-only Linux x86_64 should hit ggml-org/llama.cpp's bin-ubuntu-x64.tar.gz." - grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60 - exit 1 - fi - if ! grep -qE "prebuilt installed and validated|prebuilt up to date and validated" logs/install.log; then - echo "::error::install.log does not contain the success marker for the llama.cpp prebuilt path. Did setup.sh skip the prebuilt install?" - grep -E "llama-prebuilt|llama.cpp" logs/install.log | tail -60 - exit 1 - fi - echo "llama.cpp prebuilt path used successfully" - - - name: Reset auth + start Studio in the background - run: | - unsloth studio reset-password - mkdir -p logs - UNSLOTH_API_ONLY=1 unsloth studio -H 127.0.0.1 -p "$STUDIO_PORT" \ - > logs/studio.log 2>&1 & - echo "STUDIO_PID=$!" >> "$GITHUB_ENV" - - - name: Wait for /api/health - run: | - for i in $(seq 1 60); do - if curl -fs "http://127.0.0.1:${STUDIO_PORT}/api/health" > /tmp/health.json; then - echo "ready after ${i}s" - cat /tmp/health.json - jq -e '.status == "healthy"' /tmp/health.json - exit 0 - fi - sleep 1 - done - echo "Studio did not become healthy in 60s" - tail -200 logs/studio.log - exit 1 - - - name: Login + change bootstrap password - run: | - PW=$(cat ~/.unsloth/studio/auth/.bootstrap_password) - NEW="CIPasswordSmoke12345!" - TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ - -H 'content-type: application/json' \ - -d "{\"username\":\"unsloth\",\"password\":\"$PW\"}" | jq -r .access_token) - curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/change-password" \ - -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ - -d "{\"current_password\":\"$PW\",\"new_password\":\"$NEW\"}" > /dev/null - # Re-login to clear must_change_password flag. - NEW_TOKEN=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/auth/login" \ - -H 'content-type: application/json' \ - -d "{\"username\":\"unsloth\",\"password\":\"$NEW\"}" | jq -r .access_token) - echo "TOKEN=$NEW_TOKEN" >> "$GITHUB_ENV" - - - name: Load the GGUF into Studio - run: | - GGUF_PATH="$GITHUB_WORKSPACE/gguf-cache/${GGUF_FILE}" - ls -lh "$GGUF_PATH" - curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/load" \ - -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ - --max-time 600 \ - -d "{\"model_path\":\"$GGUF_PATH\",\"is_lora\":false,\"max_seq_length\":2048}" \ - | jq '{status, display_name, is_gguf, context_length}' - - - name: Send a chat completion + assert non-empty response - run: | - RESP=$(curl -fs -X POST "http://127.0.0.1:${STUDIO_PORT}/api/inference/chat/completions" \ - -H "Authorization: Bearer $TOKEN" -H 'content-type: application/json' \ - --max-time 900 \ - -d '{ - "messages":[{"role":"user","content":"Say hello in one short sentence."}], - "max_tokens":40, - "stream":false - }') - echo "raw response: $RESP" - CONTENT=$(echo "$RESP" | jq -r '.choices[0].message.content // empty') - echo "model response: $CONTENT" - if [ -z "$CONTENT" ]; then - echo "::error::Empty assistant response from Studio" - exit 1 - fi - - - name: Stop Studio - if: always() - run: | - kill "${STUDIO_PID}" || true - sleep 2 - ss -tln | grep ":${STUDIO_PORT}" || true - - - name: Upload Studio + install logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: studio-inference-log - path: | - logs/studio.log - logs/install.log - retention-days: 7 diff --git a/.github/workflows/studio-tauri-smoke.yml b/.github/workflows/studio-tauri-smoke.yml deleted file mode 100644 index fcc9c8d963..0000000000 --- a/.github/workflows/studio-tauri-smoke.yml +++ /dev/null @@ -1,105 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# PR-time smoke for the Tauri desktop wrapper. Builds the frontend and the -# Tauri Linux debug binary, with no codesigning. Catches: -# - tauri.conf.json drift -# - src-tauri Cargo.toml or rust source breakage -# - Tauri CLI version drift (we pin 2.10.1, matching release-desktop.yml) -# - frontend output not picked up by Tauri's distDir -# -# Linux-only on a free `ubuntu-latest` runner. Mac and Windows desktop builds -# stay in release-desktop.yml (manual `workflow_dispatch`) because they need -# code-signing secrets and ~30 min of runner time each. - -name: Studio Tauri CI - -on: - pull_request: - paths: - - 'studio/frontend/**' - - 'studio/src-tauri/**' - - '.github/workflows/studio-tauri-smoke.yml' - push: - branches: [main, pip] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - linux-debug-build: - name: Tauri Linux debug build (no codesign) - runs-on: ubuntu-22.04 - timeout-minutes: 25 - steps: - - uses: actions/checkout@v4 - - - name: Linux native deps for Tauri / WebKit2GTK - run: | - sudo apt-get update - sudo apt-get install -y \ - libwebkit2gtk-4.1-dev libayatana-appindicator3-dev \ - librsvg2-dev libxdo-dev libssl-dev patchelf - - - uses: actions/setup-node@v4 - with: - node-version: '24' - cache: 'npm' - cache-dependency-path: studio/frontend/package-lock.json - - - uses: dtolnay/rust-toolchain@stable - - - uses: swatinem/rust-cache@v2 - with: - workspaces: studio/src-tauri -> target - - - name: Install pinned Tauri CLI (matches release-desktop.yml) - run: npm install --save-dev --prefix studio @tauri-apps/cli@2.10.1 - - - name: Verify pinned Tauri CLI version - run: | - out="$(npx --prefix studio tauri --version)" - echo "$out" - [ "$out" = "tauri-cli 2.10.1" ] || { echo "::error::expected tauri-cli 2.10.1, got $out"; exit 1; } - - - name: Frontend build (npm ci, vite) - working-directory: studio/frontend - run: | - npm ci --no-fund --no-audit - npm run build - test -f dist/index.html - - - name: Tauri debug build (Linux, no bundle, no codesign) - # `--debug` + `--no-bundle` keeps this lean: compiles the Rust crate, - # confirms the frontend dist is wired into Tauri, but skips the AppImage - # / .deb production. Code signing is irrelevant because we never produce - # a distributable artifact. - env: - TAURI_SIGNING_PRIVATE_KEY: '' - TAURI_SIGNING_PRIVATE_KEY_PASSWORD: '' - run: npx --prefix studio tauri build --debug --no-bundle - - - name: Inspect produced binary - run: | - BIN=$(find studio/src-tauri/target/debug -maxdepth 1 -type f -executable 2>/dev/null \ - | grep -Ev '\.(d|so|dylib|dll)$' \ - | grep -Ev '/(deps|build|examples)$' \ - | head -1) - echo "binary: $BIN" - if [ -z "$BIN" ]; then - echo "::error::Tauri debug binary not produced" - ls -la studio/src-tauri/target/debug/ || true - exit 1 - fi - file "$BIN" - du -h "$BIN" - - - uses: actions/upload-artifact@v4 - if: failure() - with: - name: tauri-debug-build - path: | - studio/src-tauri/target/debug - studio/frontend/dist - retention-days: 3 diff --git a/.github/workflows/wheel-smoke.yml b/.github/workflows/wheel-smoke.yml deleted file mode 100644 index 080a6bb261..0000000000 --- a/.github/workflows/wheel-smoke.yml +++ /dev/null @@ -1,124 +0,0 @@ -# SPDX-License-Identifier: AGPL-3.0-only -# Copyright 2026-present the Unsloth AI Inc. team. All rights reserved. - -# Builds the PyPI wheel from the PR branch, then verifies the built wheel -# actually contains what we expect to ship and does NOT contain the broken -# Studio bundle that 2026.5.1 published. This is the single workflow that -# would have blocked the 2026.5.1 release before twine upload. -# -# Verified locally end-to-end against this branch: -# - python -m build produces unsloth--py3-none-any.whl in 13s -# - wheel content sanity passes: -# lockfile shipped, frontend dist shipped, -# no node_modules in wheel, no bun.lock in wheel, -# main bundle has unstable_Provider hits=1 (assistant-ui internals only). -# - Studio backend imports cleanly from the installed wheel with the -# lightweight dep set below. - -name: Wheel CI - -on: - pull_request: - paths: - - 'pyproject.toml' - - 'studio/**' - - 'unsloth/**' - - 'unsloth_cli/**' - - '.github/workflows/wheel-smoke.yml' - push: - branches: [main, pip] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -jobs: - wheel: - name: Wheel build + content sanity + import smoke - runs-on: ubuntu-latest - timeout-minutes: 15 - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - with: - node-version: '22' - cache: 'npm' - cache-dependency-path: studio/frontend/package-lock.json - - - uses: actions/setup-python@v5 - with: - python-version: '3.12' - - - name: Build frontend - run: | - cd studio/frontend - npm ci --no-fund --no-audit - npm run build - - - name: Build wheel + sdist - run: | - python -m pip install --upgrade pip build - rm -rf dist build ./*.egg-info - python -m build - - - name: Wheel content sanity - run: | - python - <<'PY' - import zipfile, glob, sys - w = glob.glob("dist/unsloth-*.whl") - if not w: - print("FAIL: no wheel produced"); sys.exit(2) - w = w[0] - print(f"wheel: {w}") - with zipfile.ZipFile(w) as z: - n = z.namelist() - checks = { - "lockfile shipped": any(s.endswith("studio/frontend/package-lock.json") for s in n), - "frontend dist shipped": any(s.endswith("studio/frontend/dist/index.html") for s in n), - "no node_modules": not any("studio/frontend/node_modules/" in s for s in n), - "no bun.lock": not any(s.endswith("studio/frontend/bun.lock") for s in n), - } - js = [s for s in n - if "studio/frontend/dist/assets/" in s - and s.endswith(".js") - and "/index-" in s] - if not js: - print("FAIL: no main bundle index-*.js in wheel"); sys.exit(2) - data = z.read(js[0]).decode("utf-8", "replace") - hits = data.count("unstable_Provider:") - print(f"main bundle: {js[0]}") - print(f"unstable_Provider hits: {hits} (>=4 indicates 2026.5.1 regression)") - checks["bundle has no Studio unstable_Provider call site"] = (hits < 4) - - print() - for k, v in checks.items(): - print(f" [{'PASS' if v else 'FAIL'}] {k}") - sys.exit(0 if all(checks.values()) else 1) - PY - - - name: Studio backend import smoke - # Imports `studio.backend.main:app` from the freshly-installed wheel in - # a clean venv. This catches the class of bug that 2026.5.1 shipped with: - # frontend dist missing, package-lock.json missing, or the wheel's Python - # source tree broken in a way that surfaces only at app construction time. - run: | - python -m venv /tmp/v - /tmp/v/bin/pip install --upgrade pip - /tmp/v/bin/pip install -r studio/backend/requirements/studio.txt - /tmp/v/bin/pip install \ - python-multipart aiofiles sqlalchemy cryptography \ - pyyaml jinja2 mammoth unpdf requests \ - 'numpy<3' - /tmp/v/bin/pip install --no-deps dist/unsloth-*.whl - # Run from /tmp so Python imports the installed package, not the source tree. - cd /tmp - /tmp/v/bin/python -c "from studio.backend.main import app; print('Studio backend OK:', app.title)" - - - name: Upload wheel on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: unsloth-wheel - path: dist/ - retention-days: 7 diff --git a/unsloth/models/llama.py b/unsloth/models/llama.py index d39f2588ef..93dddcc76d 100644 --- a/unsloth/models/llama.py +++ b/unsloth/models/llama.py @@ -2065,6 +2065,19 @@ def unsloth_fast_generate( ): # If the model starts out in training mode, restore training mode after generation restore_training_mode = self.training + # why: snapshot the actual GC mode value (e.g. "unsloth") before for_inference + # clears it, so the post-generate restore preserves the caller's configured GC + # mode rather than collapsing it to a plain bool. + use_gradient_checkpointing = next( + ( + v + for v in ( + getattr(m, "gradient_checkpointing", False) for m in self.modules() + ) + if v + ), + False, + ) FastLlamaModel.for_inference(self) @@ -2138,7 +2151,10 @@ def unsloth_fast_generate( # pass if restore_training_mode: - FastLlamaModel.for_training(self) + FastLlamaModel.for_training( + self, + use_gradient_checkpointing=use_gradient_checkpointing, + ) return output diff --git a/unsloth/models/rl.py b/unsloth/models/rl.py index ac9b35a822..b00db26438 100755 --- a/unsloth/models/rl.py +++ b/unsloth/models/rl.py @@ -186,6 +186,21 @@ def unwrap_model_for_generation( @contextmanager def unsloth_unwrap_model_for_generation(model, *args, **kwargs): + # why: snapshot before TRL's unwrap context manager, which calls + # gradient_checkpointing_disable() before yielding; preserve the actual + # mode value (e.g. "unsloth") rather than collapsing it to a bool, so + # the finally restore matches the caller's configured GC mode. + use_gradient_checkpointing = next( + ( + v + for v in ( + getattr(m, "gradient_checkpointing", False) + for m in model.modules() + ) + if v + ), + False, + ) with unwrap_model_for_generation(model, *args, **kwargs) as unwrapped_model: # Put the model in inference mode. FastLanguageModel.for_inference(model) @@ -207,7 +222,10 @@ def generate_with_clone(*args, **kwargs): finally: # Restore generate and return unwrapped_model.generate = original_generate - FastLanguageModel.for_training(model) + FastLanguageModel.for_training( + model, + use_gradient_checkpointing=use_gradient_checkpointing, + ) from transformers import Trainer from transformers.trainer_pt_utils import nested_detach diff --git a/unsloth/models/rl_replacements.py b/unsloth/models/rl_replacements.py index 0f10847282..9ffe40b555 100755 --- a/unsloth/models/rl_replacements.py +++ b/unsloth/models/rl_replacements.py @@ -650,7 +650,7 @@ def grpo_trainer__generate_and_score_completions(function_name, function): # Left pad prompt before calculation old and ref hidden states left_pad_tokens_per_prompt = calculate_pad_tokens_in_prompt(prompt_completion_ids, logits_to_keep, self.processing_class.pad_token_id) max_left_pad = torch.max(left_pad_tokens_per_prompt).item() - self.model.for_training()""" + self.model.for_training(use_gradient_checkpointing=getattr(self.args, 'gradient_checkpointing', True))""" function = function.replace(line_to_replace, replacement_lines)