From c6769b0f734955c7f38c049e1525ff612036f1b5 Mon Sep 17 00:00:00 2001 From: aniruddh-alt Date: Wed, 3 Jun 2026 15:27:41 -0700 Subject: [PATCH 1/4] feat(gemma4): add Gemma 4 27B MoE LoRA recipe (editable-install test) --- configs/recipes/gemma4/README.md | 8 +- .../recipes/gemma4/sft/27b_lora/gcp_job.yaml | 53 +++++++++++ .../recipes/gemma4/sft/27b_lora/train.yaml | 93 +++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml create mode 100644 configs/recipes/gemma4/sft/27b_lora/train.yaml diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md index d6439a627..b5d5231e0 100644 --- a/configs/recipes/gemma4/README.md +++ b/configs/recipes/gemma4/README.md @@ -8,7 +8,7 @@ Configs for Google's Gemma 4 model family. See the [Hugging Face announcement](h - [google/gemma-4-E2B-it](https://huggingface.co/google/gemma-4-E2B-it) (~5B) — **LoRA config available** - [google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it) (~8B) — **FFT + LoRA configs available** - Larger (image + text, 256K context) - - [google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it) (MoE, 27B) + - [google/gemma-4-26B-A4B-it](https://huggingface.co/google/gemma-4-26B-A4B-it) (MoE, 27B) — **LoRA config available** - [google/gemma-4-31B-it](https://huggingface.co/google/gemma-4-31B-it) (dense, 31B) Gemma 4 requires accepting the model license on Hugging Face before downloading. @@ -65,3 +65,9 @@ To launch Gemma 4 E4B LoRA training on a remote GCP A100 cluster: ```shell oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_lora/gcp_job.yaml --cluster gemma4-e4b-lora ``` + +To launch Gemma 4 27B (MoE) LoRA training on a remote GCP 8x A100 cluster: + +```shell +oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora +``` diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml new file mode 100644 index 000000000..25223b7aa --- /dev/null +++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml @@ -0,0 +1,53 @@ +# Job config to LoRA tune Gemma 4 27B (Mixture-of-Experts). +# +# NOTE: This recipe installs Oumi in EDITABLE mode from the synced working dir +# (`-e '.[gpu]'`) and runs the LOCAL config path (no `oumi://` prefix). This is +# required while the Gemma 4 27B recipe and the `lora_exclude_modules` feature +# are pre-release and not yet on `main`. +# +# Requirements: +# - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup +# - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it +# - Log into WandB (`wandb login`) or disable `enable_wandb` +# +# Usage: +# oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora +# +# See Also: +# - Documentation: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html +# - Config class: oumi.core.configs.JobConfig +# - Config source: https://github.com/oumi-ai/oumi/blob/main/src/oumi/core/configs/job_config.py +# - Other job configs: configs/**/*job.yaml + +name: gemma4-27b-lora + +resources: + cloud: gcp + accelerators: "A100:8" + use_spot: true + region: europe-west4 # 40GB A100s available here with abundant spot quota. + disk_size: 500 # Disk size in GBs + +working_dir: . + +file_mounts: + ~/.netrc: ~/.netrc # WandB credentials + +envs: + WANDB_PROJECT: oumi-train + OUMI_RUN_NAME: gemma4-27b.lora + +setup: | + set -e + pip install uv && SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OUMI=0.0.0.dev0 uv pip install --system -e '.[gpu]' hf_transfer + +run: | + set -e # Exit if any command failed. + source ./configs/examples/misc/sky_init.sh + + set -x + oumi distributed torchrun -m oumi train \ + -c configs/recipes/gemma4/sft/27b_lora/train.yaml \ + --training.run_name "${OUMI_RUN_NAME}.${SKYPILOT_TASK_ID}" + + echo "Node ${SKYPILOT_NODE_RANK} is all done!" diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml new file mode 100644 index 000000000..208538bbe --- /dev/null +++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml @@ -0,0 +1,93 @@ +# LoRA SFT config for Gemma 4 27B Instruct (Mixture-of-Experts). +# +# Model highlights: +# - 26.5B total parameters (~4B active per token, "A4B"), MoE multimodal model +# from Google (Gemma 4 Larger series) +# - 256K context length; image + text inputs +# - LoRA is scoped to the text transformer only. The vision tower and the +# multimodal projector are excluded via `lora_exclude_modules` below; they +# share projection-like names with the text model but PEFT cannot adapt them. +# The standard projection targets suffix-match the expert MLPs; the MoE +# router ("gate") is left untouched. +# +# Requirements: +# - transformers >= 5.10.0 +# - peft (installed automatically with oumi) +# - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it +# - Log into WandB (`wandb login`) or disable `enable_wandb` +# +# Usage: +# oumi distributed torchrun -m oumi train -c oumi://configs/recipes/gemma4/sft/27b_lora/train.yaml +# +# See Also: +# - Documentation: https://oumi.ai/docs/en/latest/user_guides/train/train.html +# - Config class: oumi.core.configs.TrainingConfig +# - Config source: https://github.com/oumi-ai/oumi/blob/main/src/oumi/core/configs/training_config.py +# - Other training configs: configs/**/*train.yaml + +model: + model_name: "google/gemma-4-26B-A4B-it" + model_max_length: 8192 + torch_dtype_str: "bfloat16" + attn_implementation: "sdpa" + trust_remote_code: false + enable_liger_kernel: false # Disabled (may conflict with Gemma output format). + +data: + train: + datasets: + - dataset_name: "yahma/alpaca-cleaned" # 51,760 examples + +training: + use_peft: true + trainer_type: "TRL_SFT" + save_final_model: true + num_train_epochs: 1 + per_device_train_batch_size: 1 + gradient_accumulation_steps: 8 + max_grad_norm: 1.0 + + enable_gradient_checkpointing: true + gradient_checkpointing_kwargs: + use_reentrant: false + ddp_find_unused_parameters: true # LoRA leaves the vision tower frozen. + compile: false + + optimizer: "adamw_torch_fused" + learning_rate: 2.0e-04 + lr_scheduler_type: "cosine" + warmup_ratio: 0.05 + weight_decay: 0.01 + + dataloader_num_workers: "auto" + dataloader_prefetch_factor: 8 + logging_steps: 5 + empty_device_cache_steps: 50 + output_dir: "output/gemma4_27b.lora" + include_performance_metrics: true + enable_wandb: true + +peft: + lora_r: 8 + lora_alpha: 16 + lora_dropout: 0.05 + lora_target_modules: + - "q_proj" + - "k_proj" + - "v_proj" + - "o_proj" + - "gate_proj" + - "up_proj" + - "down_proj" + # Keep LoRA on the text transformer only; exclude the vision tower and the + # multimodal projector (image+text model, no audio tower). + lora_exclude_modules: + - ".*vision_tower.*" + - ".*multi_modal_projector.*" + +fsdp: + enable_fsdp: true + sharding_strategy: "FULL_SHARD" + forward_prefetch: true + auto_wrap_policy: "TRANSFORMER_BASED_WRAP" + transformer_layer_cls: "Gemma4TextDecoderLayer" From 38eb807e4851c4a9ce8fe5e9e8e7581d84245d60 Mon Sep 17 00:00:00 2001 From: aniruddh-alt Date: Thu, 4 Jun 2026 10:12:22 -0700 Subject: [PATCH 2/4] fix(gemma4): finalize 27B MoE LoRA recipe (released install form, accurate transformers/MoE notes) --- configs/recipes/gemma4/README.md | 2 +- configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml | 14 ++++---------- configs/recipes/gemma4/sft/27b_lora/train.yaml | 11 ++++++++--- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md index b5d5231e0..3457edcd6 100644 --- a/configs/recipes/gemma4/README.md +++ b/configs/recipes/gemma4/README.md @@ -69,5 +69,5 @@ oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_lora/gcp_job.yaml --clus To launch Gemma 4 27B (MoE) LoRA training on a remote GCP 8x A100 cluster: ```shell -oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora +oumi launch up -c oumi://configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora ``` diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml index 25223b7aa..fe38f0aea 100644 --- a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml +++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml @@ -1,17 +1,12 @@ # Job config to LoRA tune Gemma 4 27B (Mixture-of-Experts). # -# NOTE: This recipe installs Oumi in EDITABLE mode from the synced working dir -# (`-e '.[gpu]'`) and runs the LOCAL config path (no `oumi://` prefix). This is -# required while the Gemma 4 27B recipe and the `lora_exclude_modules` feature -# are pre-release and not yet on `main`. -# # Requirements: # - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup # - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it # - Log into WandB (`wandb login`) or disable `enable_wandb` # # Usage: -# oumi launch up -c configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora +# oumi launch up -c oumi://configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml --cluster gemma4-27b-lora # # See Also: # - Documentation: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html @@ -24,8 +19,7 @@ name: gemma4-27b-lora resources: cloud: gcp accelerators: "A100:8" - use_spot: true - region: europe-west4 # 40GB A100s available here with abundant spot quota. + use_spot: false disk_size: 500 # Disk size in GBs working_dir: . @@ -39,7 +33,7 @@ envs: setup: | set -e - pip install uv && SETUPTOOLS_SCM_PRETEND_VERSION_FOR_OUMI=0.0.0.dev0 uv pip install --system -e '.[gpu]' hf_transfer + pip install uv && uv pip install --system oumi[gpu] hf_transfer run: | set -e # Exit if any command failed. @@ -47,7 +41,7 @@ run: | set -x oumi distributed torchrun -m oumi train \ - -c configs/recipes/gemma4/sft/27b_lora/train.yaml \ + -c oumi://configs/recipes/gemma4/sft/27b_lora/train.yaml \ --training.run_name "${OUMI_RUN_NAME}.${SKYPILOT_TASK_ID}" echo "Node ${SKYPILOT_NODE_RANK} is all done!" diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml index 208538bbe..ad90af664 100644 --- a/configs/recipes/gemma4/sft/27b_lora/train.yaml +++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml @@ -7,11 +7,12 @@ # - LoRA is scoped to the text transformer only. The vision tower and the # multimodal projector are excluded via `lora_exclude_modules` below; they # share projection-like names with the text model but PEFT cannot adapt them. -# The standard projection targets suffix-match the expert MLPs; the MoE -# router ("gate") is left untouched. +# NOTE: on this MoE the standard gate_proj/up_proj/down_proj names do not +# match the (fused) expert MLP modules, so LoRA adapts only the attention +# projections (~9.3M params). Adapting the experts needs their module names. # # Requirements: -# - transformers >= 5.10.0 +# - transformers >= 5.5.4 # - peft (installed automatically with oumi) # - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it # - Log into WandB (`wandb login`) or disable `enable_wandb` @@ -44,6 +45,10 @@ training: save_final_model: true num_train_epochs: 1 per_device_train_batch_size: 1 + # NOTE: gradient accumulation inflates reported loss for Gemma 4 (~4x) due + # to a missing `accepts_loss_kwargs = False` on Gemma4ForConditionalGeneration. + # Fixed on transformers main but not yet released as of 5.5.4. See + # huggingface/transformers#40564 (same bug existed in Gemma 3). gradient_accumulation_steps: 8 max_grad_norm: 1.0 From 7db829d7f05880a037b4302f12e82f01a3802de0 Mon Sep 17 00:00:00 2001 From: aniruddh-alt Date: Fri, 5 Jun 2026 14:06:20 -0700 Subject: [PATCH 3/4] fix(gemma4): accurate LoRA exclude docs + drop inert ddp flag (27B) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit README LoRA prose claimed the recipes exclude .*audio_tower.*, but the Larger image+text models (31B/27B) have no audio tower and exclude .*multi_modal_projector.* — generalize the prose to cover both families. Remove ddp_find_unused_parameters from 27b_lora/train.yaml: it is a no-op under FSDP (which this recipe always enables; distributed.py routes the flag only to the DDP wrapper) and its comment was misleading. Reword the header exclusion rationale to match the e4b sibling (Gemma4ClippableLinear). --- configs/recipes/gemma4/README.md | 13 +++++++------ configs/recipes/gemma4/sft/27b_lora/train.yaml | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/configs/recipes/gemma4/README.md b/configs/recipes/gemma4/README.md index 3457edcd6..6a7c7a49f 100644 --- a/configs/recipes/gemma4/README.md +++ b/configs/recipes/gemma4/README.md @@ -41,12 +41,13 @@ oumi launch up -c oumi://configs/recipes/gemma4/sft/e4b_full/gcp_job.yaml --clus ### LoRA Training -LoRA is scoped to the language-model layers only. Gemma 4's vision/audio towers -use `Gemma4ClippableLinear` wrappers that PEFT cannot adapt, and they share -projection names (`q_proj`, `v_proj`, ...) with the text model. The recipes target -the plain projection names and set `lora_exclude_modules: [".*vision_tower.*", -".*audio_tower.*"]`, which oumi passes to PEFT's `exclude_modules` to keep LoRA off -the towers. +LoRA is scoped to the language-model layers only. Gemma 4's non-text towers use +`Gemma4ClippableLinear` wrappers that PEFT cannot adapt, and they share projection +names (`q_proj`, `v_proj`, ...) with the text model. The recipes target the plain +projection names and set `lora_exclude_modules` to keep LoRA off the towers: the +Efficient (text+image+audio) models exclude `[".*vision_tower.*", ".*audio_tower.*"]`, +and the Larger (image+text) models exclude `[".*vision_tower.*", ".*multi_modal_projector.*"]`. +oumi passes this list to PEFT's `exclude_modules`. To launch Gemma 4 E4B LoRA training locally (fits a single A100/H100): diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml index ad90af664..b8e83b0f1 100644 --- a/configs/recipes/gemma4/sft/27b_lora/train.yaml +++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml @@ -5,8 +5,9 @@ # from Google (Gemma 4 Larger series) # - 256K context length; image + text inputs # - LoRA is scoped to the text transformer only. The vision tower and the -# multimodal projector are excluded via `lora_exclude_modules` below; they -# share projection-like names with the text model but PEFT cannot adapt them. +# multimodal projector use `Gemma4ClippableLinear` wrappers that PEFT cannot +# adapt, and share projection names with the text model — so they are excluded +# via `lora_exclude_modules` below. # NOTE: on this MoE the standard gate_proj/up_proj/down_proj names do not # match the (fused) expert MLP modules, so LoRA adapts only the attention # projections (~9.3M params). Adapting the experts needs their module names. @@ -55,7 +56,6 @@ training: enable_gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false - ddp_find_unused_parameters: true # LoRA leaves the vision tower frozen. compile: false optimizer: "adamw_torch_fused" From bb809941ebb0eb011aff9db66785e3b5fc9d9b5d Mon Sep 17 00:00:00 2001 From: aniruddh-alt Date: Fri, 5 Jun 2026 15:21:41 -0700 Subject: [PATCH 4/4] fix(gemma4): correct 27B LoRA license note (Gemma ToU, gated) Gemma 4 is under the Gemma Terms of Use and gated on HF, not apache-2.0/ungated. Match the rest of the repo's wording. Same liberate-bot fix as the sibling 31B PR. --- configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml | 2 +- configs/recipes/gemma4/sft/27b_lora/train.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml index fe38f0aea..6d924b450 100644 --- a/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml +++ b/configs/recipes/gemma4/sft/27b_lora/gcp_job.yaml @@ -2,7 +2,7 @@ # # Requirements: # - Set up SkyPilot GCP: https://oumi.ai/docs/en/latest/user_guides/launch/launch.html#setup -# - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it +# - Gemma license acceptance required: https://huggingface.co/google/gemma-4-26B-A4B-it # - Log into WandB (`wandb login`) or disable `enable_wandb` # # Usage: diff --git a/configs/recipes/gemma4/sft/27b_lora/train.yaml b/configs/recipes/gemma4/sft/27b_lora/train.yaml index b8e83b0f1..ed572e510 100644 --- a/configs/recipes/gemma4/sft/27b_lora/train.yaml +++ b/configs/recipes/gemma4/sft/27b_lora/train.yaml @@ -15,7 +15,7 @@ # Requirements: # - transformers >= 5.5.4 # - peft (installed automatically with oumi) -# - apache-2.0 license, no gating: https://huggingface.co/google/gemma-4-26B-A4B-it +# - Gemma license acceptance required: https://huggingface.co/google/gemma-4-26B-A4B-it # - Log into WandB (`wandb login`) or disable `enable_wandb` # # Usage: