From c6769b0f734955c7f38c049e1525ff612036f1b5 Mon Sep 17 00:00:00 2001
From: aniruddh-alt <aniruddhr04@gmail.com>
Date: Wed, 3 Jun 2026 15:27:41 -0700
Subject: [PATCH 1/4] feat(gemma4): add Gemma 4 27B MoE LoRA recipe
 (editable-install test)

---
 configs/recipes/gemma4/README.md              |  8 +-
 .../recipes/gemma4/sft/27b_lora/gcp_job.yaml  | 53 +++++++++++
 .../recipes/gemma4/sft/27b_lora/train.yaml    | 93 +++++++++++++++++++
 3 files changed, 153 insertions(+), 1 deletion(-)
 create mode 100644 configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
 create mode 100644 configs/recipes/gemma4/sft/27b_lora/train.yaml

diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md
index d6439a627..b5d5231e0 100644
--- a/configs/recipes/gemma4/README.md
+++ b/configs/recipes/gemma4/README.md
@@ -8,7 +8,7 @@ Configs for Google's Gemma 4 model family. See the [Hugging Face announcement](h
   - [google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it) (~5B) — **LoRA config available**
   - [google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it) (~8B) — **FFT + LoRA configs available**
 - Larger (image + text, 256K context)
-  - [google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it) (MoE, 27B)
+  - [google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it) (MoE, 27B) — **LoRA config available**
   - [google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it) (dense, 31B)
 
 Gemma 4 requires accepting the model license on Hugging Face before downloading.
@@ -65,3 +65,9 @@ To launch Gemma 4 E4B LoRA training on a remote GCP A100 cluster:
 ```shell
 oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_lora/gcp_job.yaml --cluster gemma4-e4b-lora
 ```
+
+To launch Gemma 4 27B (MoE) LoRA training on a remote GCP 8x A100 cluster:
+
+```shell
+oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
+```
diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
new file mode 100644
index 000000000..25223b7aa
--- /dev/null
+++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
@@ -0,0 +1,53 @@
+# Job config to LoRA tune Gemma 4 27B (Mixture-of-Experts).
+#
+# NOTE: This recipe installs Oumi in EDITABLE mode from the synced working dir
+# (`-e '.[gpu]'`) and runs the LOCAL config path (no `oumi://` prefix). This is
+# required while the Gemma 4 27B recipe and the `lora_exclude_modules` feature
+# are pre-release and not yet on `main`.
+#
+# Requirements:
+#   - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup
+#   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
+#   - Log into WandB (`wandb login`) or disable `enable_wandb`
+#
+# Usage:
+#   oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
+#
+# See Also:
+#   - Documentation: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html
+#   - Config class: oumi.core.configs.JobConfig
+#   - Config source: https://github.com/oumi-ai/oumi/blob/main/src/oumi/core/configs/job_config.py
+#   - Other job configs: configs/**/*job.yaml
+
+name: gemma4-27b-lora
+
+resources:
+  cloud: gcp
+  accelerators: "A100:8"
+  use_spot: true
+  region: europe-west4  # 40GB A100s available here with abundant spot quota.
+  disk_size: 500  # Disk size in GBs
+
+working_dir: .
+
+file_mounts:
+  ~/.netrc: ~/.netrc  # WandB credentials
+
+envs:
+  WANDB_PROJECT: oumi-train
+  OUMI_RUN_NAME: gemma4-27b.lora
+
+setup: |
+  set -e
+  pip install uv && SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OUMI=0.0.0.dev0 uv pip install --system -e '.[gpu]' hf_transfer
+
+run: |
+  set -e  # Exit if any command failed.
+  source ./configs/examples/misc/sky_init.sh
+
+  set -x
+  oumi distributed torchrun -m oumi train \
+      -c configs/recipes/gemma4/sft/27b_lora/train.yaml \
+      --training.run_name "${OUMI_RUN_NAME}.${SKYPILOT_TASK_ID}"
+
+  echo "Node ${SKYPILOT_NODE_RANK} is all done!"
diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml
new file mode 100644
index 000000000..208538bbe
--- /dev/null
+++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml
@@ -0,0 +1,93 @@
+# LoRA SFT config for Gemma 4 27B Instruct (Mixture-of-Experts).
+#
+# Model highlights:
+#   - 26.5B total parameters (~4B active per token, "A4B"), MoE multimodal model
+#     from Google (Gemma 4 Larger series)
+#   - 256K context length; image + text inputs
+#   - LoRA is scoped to the text transformer only. The vision tower and the
+#     multimodal projector are excluded via `lora_exclude_modules` below; they
+#     share projection-like names with the text model but PEFT cannot adapt them.
+#     The standard projection targets suffix-match the expert MLPs; the MoE
+#     router ("gate") is left untouched.
+#
+# Requirements:
+#   - transformers >= 5.10.0
+#   - peft (installed automatically with oumi)
+#   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
+#   - Log into WandB (`wandb login`) or disable `enable_wandb`
+#
+# Usage:
+#   oumi distributed torchrun -m oumi train -c oumi://configs/recipes/gemma4/sft/27b_lora/train.yaml
+#
+# See Also:
+#   - Documentation: https://oumi.ai/docs/en/latest/user_guides/train/train.html
+#   - Config class: oumi.core.configs.TrainingConfig
+#   - Config source: https://github.com/oumi-ai/oumi/blob/main/src/oumi/core/configs/training_config.py
+#   - Other training configs: configs/**/*train.yaml
+
+model:
+  model_name: "google/gemma-4-26B-A4B-it"
+  model_max_length: 8192
+  torch_dtype_str: "bfloat16"
+  attn_implementation: "sdpa"
+  trust_remote_code: false
+  enable_liger_kernel: false  # Disabled (may conflict with Gemma output format).
+
+data:
+  train:
+    datasets:
+      - dataset_name: "yahma/alpaca-cleaned"  # 51,760 examples
+
+training:
+  use_peft: true
+  trainer_type: "TRL_SFT"
+  save_final_model: true
+  num_train_epochs: 1
+  per_device_train_batch_size: 1
+  gradient_accumulation_steps: 8
+  max_grad_norm: 1.0
+
+  enable_gradient_checkpointing: true
+  gradient_checkpointing_kwargs:
+    use_reentrant: false
+  ddp_find_unused_parameters: true  # LoRA leaves the vision tower frozen.
+  compile: false
+
+  optimizer: "adamw_torch_fused"
+  learning_rate: 2.0e-04
+  lr_scheduler_type: "cosine"
+  warmup_ratio: 0.05
+  weight_decay: 0.01
+
+  dataloader_num_workers: "auto"
+  dataloader_prefetch_factor: 8
+  logging_steps: 5
+  empty_device_cache_steps: 50
+  output_dir: "output/gemma4_27b.lora"
+  include_performance_metrics: true
+  enable_wandb: true
+
+peft:
+  lora_r: 8
+  lora_alpha: 16
+  lora_dropout: 0.05
+  lora_target_modules:
+    - "q_proj"
+    - "k_proj"
+    - "v_proj"
+    - "o_proj"
+    - "gate_proj"
+    - "up_proj"
+    - "down_proj"
+  # Keep LoRA on the text transformer only; exclude the vision tower and the
+  # multimodal projector (image+text model, no audio tower).
+  lora_exclude_modules:
+    - ".*vision_tower.*"
+    - ".*multi_modal_projector.*"
+
+fsdp:
+  enable_fsdp: true
+  sharding_strategy: "FULL_SHARD"
+  forward_prefetch: true
+  auto_wrap_policy: "TRANSFORMER_BASED_WRAP"
+  transformer_layer_cls: "Gemma4TextDecoderLayer"

From 38eb807e4851c4a9ce8fe5e9e8e7581d84245d60 Mon Sep 17 00:00:00 2001
From: aniruddh-alt <aniruddhr04@gmail.com>
Date: Thu, 4 Jun 2026 10:12:22 -0700
Subject: [PATCH 2/4] fix(gemma4): finalize 27B MoE LoRA recipe (released
 install form, accurate transformers/MoE notes)

---
 configs/recipes/gemma4/README.md                 |  2 +-
 configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml | 14 ++++----------
 configs/recipes/gemma4/sft/27b_lora/train.yaml   | 11 ++++++++---
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md
index b5d5231e0..3457edcd6 100644
--- a/configs/recipes/gemma4/README.md
+++ b/configs/recipes/gemma4/README.md
@@ -69,5 +69,5 @@ oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_lora/gcp_job.yaml --clus
 To launch Gemma 4 27B (MoE) LoRA training on a remote GCP 8x A100 cluster:
 
 ```shell
-oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
+oumi launch up -c oumi://configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
 ```
diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
index 25223b7aa..fe38f0aea 100644
--- a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
+++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
@@ -1,17 +1,12 @@
 # Job config to LoRA tune Gemma 4 27B (Mixture-of-Experts).
 #
-# NOTE: This recipe installs Oumi in EDITABLE mode from the synced working dir
-# (`-e '.[gpu]'`) and runs the LOCAL config path (no `oumi://` prefix). This is
-# required while the Gemma 4 27B recipe and the `lora_exclude_modules` feature
-# are pre-release and not yet on `main`.
-#
 # Requirements:
 #   - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup
 #   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
 #   - Log into WandB (`wandb login`) or disable `enable_wandb`
 #
 # Usage:
-#   oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
+#   oumi launch up -c oumi://configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora
 #
 # See Also:
 #   - Documentation: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html
@@ -24,8 +19,7 @@ name: gemma4-27b-lora
 resources:
   cloud: gcp
   accelerators: "A100:8"
-  use_spot: true
-  region: europe-west4  # 40GB A100s available here with abundant spot quota.
+  use_spot: false
   disk_size: 500  # Disk size in GBs
 
 working_dir: .
@@ -39,7 +33,7 @@ envs:
 
 setup: |
   set -e
-  pip install uv && SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OUMI=0.0.0.dev0 uv pip install --system -e '.[gpu]' hf_transfer
+  pip install uv && uv pip install --system oumi[gpu] hf_transfer
 
 run: |
   set -e  # Exit if any command failed.
@@ -47,7 +41,7 @@ run: |
 
   set -x
   oumi distributed torchrun -m oumi train \
-      -c configs/recipes/gemma4/sft/27b_lora/train.yaml \
+      -c oumi://configs/recipes/gemma4/sft/27b_lora/train.yaml \
       --training.run_name "${OUMI_RUN_NAME}.${SKYPILOT_TASK_ID}"
 
   echo "Node ${SKYPILOT_NODE_RANK} is all done!"
diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml
index 208538bbe..ad90af664 100644
--- a/configs/recipes/gemma4/sft/27b_lora/train.yaml
+++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml
@@ -7,11 +7,12 @@
 #   - LoRA is scoped to the text transformer only. The vision tower and the
 #     multimodal projector are excluded via `lora_exclude_modules` below; they
 #     share projection-like names with the text model but PEFT cannot adapt them.
-#     The standard projection targets suffix-match the expert MLPs; the MoE
-#     router ("gate") is left untouched.
+#     NOTE: on this MoE the standard gate_proj/up_proj/down_proj names do not
+#     match the (fused) expert MLP modules, so LoRA adapts only the attention
+#     projections (~9.3M params). Adapting the experts needs their module names.
 #
 # Requirements:
-#   - transformers >= 5.10.0
+#   - transformers >= 5.5.4
 #   - peft (installed automatically with oumi)
 #   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
 #   - Log into WandB (`wandb login`) or disable `enable_wandb`
@@ -44,6 +45,10 @@ training:
   save_final_model: true
   num_train_epochs: 1
   per_device_train_batch_size: 1
+  # NOTE: gradient accumulation inflates reported loss for Gemma 4 (~4x) due
+  # to a missing `accepts_loss_kwargs = False` on Gemma4ForConditionalGeneration.
+  # Fixed on transformers main but not yet released as of 5.5.4. See
+  # huggingface/transformers#40564 (same bug existed in Gemma 3).
   gradient_accumulation_steps: 8
   max_grad_norm: 1.0
 

From 7db829d7f05880a037b4302f12e82f01a3802de0 Mon Sep 17 00:00:00 2001
From: aniruddh-alt <aniruddhr04@gmail.com>
Date: Fri, 5 Jun 2026 14:06:20 -0700
Subject: [PATCH 3/4] fix(gemma4): accurate LoRA exclude docs + drop inert ddp
 flag (27B)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

README LoRA prose claimed the recipes exclude .*audio_tower.*, but the Larger image+text models (31B/27B) have no audio tower and exclude .*multi_modal_projector.* — generalize the prose to cover both families. Remove ddp_find_unused_parameters from 27b_lora/train.yaml: it is a no-op under FSDP (which this recipe always enables; distributed.py routes the flag only to the DDP wrapper) and its comment was misleading. Reword the header exclusion rationale to match the e4b sibling (Gemma4ClippableLinear).
---
 configs/recipes/gemma4/README.md               | 13 +++++++------
 configs/recipes/gemma4/sft/27b_lora/train.yaml |  6 +++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md
index 3457edcd6..6a7c7a49f 100644
--- a/configs/recipes/gemma4/README.md
+++ b/configs/recipes/gemma4/README.md
@@ -41,12 +41,13 @@ oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_full/gcp_job.yaml --clus
 
 ### LoRA Training
 
-LoRA is scoped to the language-model layers only. Gemma 4's vision/audio towers
-use `Gemma4ClippableLinear` wrappers that PEFT cannot adapt, and they share
-projection names (`q_proj`, `v_proj`, ...) with the text model. The recipes target
-the plain projection names and set `lora_exclude_modules: [".*vision_tower.*",
-".*audio_tower.*"]`, which oumi passes to PEFT's `exclude_modules` to keep LoRA off
-the towers.
+LoRA is scoped to the language-model layers only. Gemma 4's non-text towers use
+`Gemma4ClippableLinear` wrappers that PEFT cannot adapt, and they share projection
+names (`q_proj`, `v_proj`, ...) with the text model. The recipes target the plain
+projection names and set `lora_exclude_modules` to keep LoRA off the towers: the
+Efficient (text+image+audio) models exclude `[".*vision_tower.*", ".*audio_tower.*"]`,
+and the Larger (image+text) models exclude `[".*vision_tower.*", ".*multi_modal_projector.*"]`.
+oumi passes this list to PEFT's `exclude_modules`.
 
 To launch Gemma 4 E4B LoRA training locally (fits a single A100/H100):
 
diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml
index ad90af664..b8e83b0f1 100644
--- a/configs/recipes/gemma4/sft/27b_lora/train.yaml
+++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml
@@ -5,8 +5,9 @@
 #     from Google (Gemma 4 Larger series)
 #   - 256K context length; image + text inputs
 #   - LoRA is scoped to the text transformer only. The vision tower and the
-#     multimodal projector are excluded via `lora_exclude_modules` below; they
-#     share projection-like names with the text model but PEFT cannot adapt them.
+#     multimodal projector use `Gemma4ClippableLinear` wrappers that PEFT cannot
+#     adapt, and share projection names with the text model — so they are excluded
+#     via `lora_exclude_modules` below.
 #     NOTE: on this MoE the standard gate_proj/up_proj/down_proj names do not
 #     match the (fused) expert MLP modules, so LoRA adapts only the attention
 #     projections (~9.3M params). Adapting the experts needs their module names.
@@ -55,7 +56,6 @@ training:
   enable_gradient_checkpointing: true
   gradient_checkpointing_kwargs:
     use_reentrant: false
-  ddp_find_unused_parameters: true  # LoRA leaves the vision tower frozen.
   compile: false
 
   optimizer: "adamw_torch_fused"

From bb809941ebb0eb011aff9db66785e3b5fc9d9b5d Mon Sep 17 00:00:00 2001
From: aniruddh-alt <aniruddhr04@gmail.com>
Date: Fri, 5 Jun 2026 15:21:41 -0700
Subject: [PATCH 4/4] fix(gemma4): correct 27B LoRA license note (Gemma ToU,
 gated)

Gemma 4 is under the Gemma Terms of Use and gated on HF, not apache-2.0/ungated. Match the rest of the repo's wording. Same liberate-bot fix as the sibling 31B PR.
---
 configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml | 2 +-
 configs/recipes/gemma4/sft/27b_lora/train.yaml   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
index fe38f0aea..6d924b450 100644
--- a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
+++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml
@@ -2,7 +2,7 @@
 #
 # Requirements:
 #   - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup
-#   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
+#   - Gemma license acceptance required: https://huggingface.co/google/gemma-4-26B-A4B-it
 #   - Log into WandB (`wandb login`) or disable `enable_wandb`
 #
 # Usage:
diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml
index b8e83b0f1..ed572e510 100644
--- a/configs/recipes/gemma4/sft/27b_lora/train.yaml
+++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml
@@ -15,7 +15,7 @@
 # Requirements:
 #   - transformers >= 5.5.4
 #   - peft (installed automatically with oumi)
-#   - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it
+#   - Gemma license acceptance required: https://huggingface.co/google/gemma-4-26B-A4B-it
 #   - Log into WandB (`wandb login`) or disable `enable_wandb`
 #
 # Usage: