From c9c1a2fbd77569584d4e9b288fc772df222a8b86 Mon Sep 17 00:00:00 2001 From: Etherll Date: Fri, 19 Dec 2025 08:55:30 +0200 Subject: [PATCH] Change vllm save cells --- nb/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- ...ot-Finetune_for_Reasoning_on_CodeForces.ipynb | 16 ++++++++-------- nb/CodeGemma_(7B)-Conversational.ipynb | 16 ++++++++-------- nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 16 ++++++++-------- nb/Deepseek_OCR_(3B)-Eval.ipynb | 4 ++-- nb/Deepseek_OCR_(3B)-Evaluation.ipynb | 4 ++-- nb/Deepseek_OCR_(3B).ipynb | 4 ++-- nb/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb | 16 ++++++++-------- nb/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb | 4 ++-- nb/Falcon_H1_(0.5B)-Alpaca.ipynb | 16 ++++++++-------- nb/FunctionGemma_(270M)-Mobile-Actions.ipynb | 16 ++++++++-------- nb/FunctionGemma_(270M).ipynb | 16 ++++++++-------- nb/Gemma2_(2B)-Alpaca.ipynb | 16 ++++++++-------- nb/Gemma2_(9B)-Alpaca.ipynb | 16 ++++++++-------- nb/Gemma3N_(4B)-Audio.ipynb | 2 +- nb/Gemma3N_(4B)-Conversational.ipynb | 2 +- nb/Gemma3N_(4B)-Vision.ipynb | 4 ++-- nb/Gemma3_(1B)-GRPO.ipynb | 2 +- nb/Gemma3_(270M).ipynb | 16 ++++++++-------- nb/Gemma3_(27B)_A100-Conversational.ipynb | 2 +- nb/Gemma3_(4B)-Vision-GRPO.ipynb | 4 ++-- nb/Gemma3_(4B)-Vision.ipynb | 4 ++-- nb/Gemma3_(4B).ipynb | 2 +- nb/Granite4.0.ipynb | 16 ++++++++-------- nb/Granite4.0_350M.ipynb | 16 ++++++++-------- ...Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- ...Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- ...Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb | 2 +- ...gingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb | 4 ++-- nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Llama_FP8_GRPO.ipynb | 16 ++++++++-------- ...ggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb | 16 ++++++++-------- ...ngFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb | 16 ++++++++-------- nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb | 8 ++++---- ...gingFace Course-gpt-oss-(20B)_A100-GRPO.ipynb | 14 +++++++------- ...gingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb | 8 ++++---- nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- ...ot-Finetune_for_Reasoning_on_CodeForces.ipynb | 16 ++++++++-------- nb/Kaggle-CodeGemma_(7B)-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Deepseek_OCR_(3B)-Eval.ipynb | 4 ++-- nb/Kaggle-Deepseek_OCR_(3B)-Evaluation.ipynb | 4 ++-- nb/Kaggle-Deepseek_OCR_(3B).ipynb | 4 ++-- ...gle-ERNIE_4_5_21B_A3B_PT-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb | 4 ++-- nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Gemma2_(9B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Gemma3N_(4B)-Audio.ipynb | 2 +- nb/Kaggle-Gemma3N_(4B)-Conversational.ipynb | 2 +- nb/Kaggle-Gemma3N_(4B)-Vision.ipynb | 4 ++-- nb/Kaggle-Gemma3_(1B)-GRPO.ipynb | 2 +- nb/Kaggle-Gemma3_(270M).ipynb | 16 ++++++++-------- nb/Kaggle-Gemma3_(27B)_A100-Conversational.ipynb | 2 +- nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb | 4 ++-- nb/Kaggle-Gemma3_(4B)-Vision.ipynb | 4 ++-- nb/Kaggle-Gemma3_(4B).ipynb | 2 +- nb/Kaggle-Granite4.0.ipynb | 16 ++++++++-------- nb/Kaggle-Granite4.0_350M.ipynb | 16 ++++++++-------- ...aggle-Liquid_LFM2_(1.2B)-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3.1_(8B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3.2_(11B)-Vision.ipynb | 4 ++-- nb/Kaggle-Llama3.2_(1B)-RAFT.ipynb | 16 ++++++++-------- ...gle-Llama3.2_(1B_and_3B)-Conversational.ipynb | 16 ++++++++-------- ...ggle-Llama3.3_(70B)_A100-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3_(8B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3_(8B)-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Llama3_(8B)-ORPO.ipynb | 16 ++++++++-------- nb/Kaggle-Llama_FP8_GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Llasa_TTS_(1B).ipynb | 16 ++++++++-------- nb/Kaggle-Llasa_TTS_(3B).ipynb | 16 ++++++++-------- ...agistral_(24B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- ...aggle-Meta-Synthetic-Data-Llama3.1_(8B).ipynb | 16 ++++++++-------- ...aggle-Meta_Synthetic_Data_Llama3_2_(3B).ipynb | 16 ++++++++-------- ...(3B)_Reinforcement_Learning_Sudoku_Game.ipynb | 16 ++++++++-------- nb/Kaggle-Ministral_3_VL_(3B)_Vision.ipynb | 4 ++-- nb/Kaggle-Mistral_Nemo_(12B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Mistral_Small_(22B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Mistral_v0.3_(7B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Mistral_v0.3_(7B)-CPT.ipynb | 16 ++++++++-------- nb/Kaggle-Mistral_v0.3_(7B)-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Nemotron-3-Nano-30B-A3B_A100.ipynb | 16 ++++++++-------- nb/Kaggle-Nemotron-Nano-3-30B-A3B_A100.ipynb | 16 ++++++++-------- nb/Kaggle-Orpheus_(3B)-TTS.ipynb | 16 ++++++++-------- nb/Kaggle-Oute_TTS_(1B).ipynb | 16 ++++++++-------- nb/Kaggle-Paddle_OCR_(1B)_Vision.ipynb | 4 ++-- nb/Kaggle-Phi_3.5_Mini-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Phi_3_Medium-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Phi_4-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Phi_4_(14B)-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Pixtral_(12B)-Vision.ipynb | 4 ++-- nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen2.5_(7B)-Alpaca.ipynb | 16 ++++++++-------- ...ggle-Qwen2.5_Coder_(14B)-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen2.5_VL_(7B)-Vision.ipynb | 4 ++-- nb/Kaggle-Qwen2_(7B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen2_VL_(7B)-Vision.ipynb | 4 ++-- nb/Kaggle-Qwen3_(14B)-Alpaca.ipynb | 16 ++++++++-------- ...le-Qwen3_(14B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_(14B).ipynb | 16 ++++++++-------- ...en3_(32B)_A100-Reasoning-Conversational.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_(4B)-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_(4B)-Instruct.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_(4B)-Thinking.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb | 16 ++++++++-------- nb/Kaggle-Qwen3_VL_(8B)-Vision.ipynb | 4 ++-- nb/Kaggle-Sesame_CSM_(1B)-TTS.ipynb | 12 ++++++------ nb/Kaggle-Spark_TTS_(0_5B).ipynb | 16 ++++++++-------- nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb | 16 ++++++++-------- nb/Kaggle-Whisper.ipynb | 16 ++++++++-------- nb/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.ipynb | 14 +++++++------- nb/Kaggle-gpt-oss-(20B)-Fine-tuning.ipynb | 8 ++++---- nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb | 8 ++++---- nb/Kaggle-gpt-oss-(20B)_A100-GRPO.ipynb | 14 +++++++------- ...-gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb | 8 ++++---- nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb | 8 ++++---- nb/Liquid_LFM2-Conversational.ipynb | 16 ++++++++-------- nb/Liquid_LFM2_(1.2B)-Conversational.ipynb | 16 ++++++++-------- nb/Llama3.1_(8B)-Alpaca.ipynb | 16 ++++++++-------- nb/Llama3.1_(8B)-GRPO.ipynb | 16 ++++++++-------- nb/Llama3.2_(11B)-Vision.ipynb | 4 ++-- nb/Llama3.2_(1B)-RAFT.ipynb | 16 ++++++++-------- nb/Llama3.2_(1B_and_3B)-Conversational.ipynb | 16 ++++++++-------- nb/Llama3.3_(70B)_A100-Conversational.ipynb | 16 ++++++++-------- nb/Llama3_(8B)-Alpaca.ipynb | 16 ++++++++-------- nb/Llama3_(8B)-Conversational.ipynb | 16 ++++++++-------- nb/Llama3_(8B)-ORPO.ipynb | 16 ++++++++-------- nb/Llama_FP8_GRPO.ipynb | 16 ++++++++-------- nb/Llasa_TTS_(1B).ipynb | 16 ++++++++-------- nb/Llasa_TTS_(3B).ipynb | 16 ++++++++-------- ...agistral_(24B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- nb/Meta-Synthetic-Data-Llama3.1_(8B).ipynb | 16 ++++++++-------- nb/Meta_Synthetic_Data_Llama3_2_(3B).ipynb | 16 ++++++++-------- ...(3B)_Reinforcement_Learning_Sudoku_Game.ipynb | 16 ++++++++-------- nb/Ministral_3_VL_(3B)_Vision.ipynb | 4 ++-- nb/Mistral_Nemo_(12B)-Alpaca.ipynb | 16 ++++++++-------- nb/Mistral_Small_(22B)-Alpaca.ipynb | 16 ++++++++-------- nb/Mistral_v0.3_(7B)-Alpaca.ipynb | 16 ++++++++-------- nb/Mistral_v0.3_(7B)-CPT.ipynb | 16 ++++++++-------- nb/Mistral_v0.3_(7B)-Conversational.ipynb | 16 ++++++++-------- nb/Mistral_v0.3_(7B)-GRPO.ipynb | 16 ++++++++-------- nb/Nemotron-3-Nano-30B-A3B_A100.ipynb | 16 ++++++++-------- nb/Nemotron-Nano-3-30B-A3B_A100.ipynb | 16 ++++++++-------- ..._(20B)_Reinforcement_Learning_2048_Game.ipynb | 8 ++++---- ...)_Reinforcement_Learning_2048_Game_BF16.ipynb | 8 ++++---- nb/Orpheus_(3B)-TTS.ipynb | 16 ++++++++-------- nb/Oute_TTS_(1B).ipynb | 16 ++++++++-------- nb/Paddle_OCR_(1B)_Vision.ipynb | 4 ++-- nb/Phi_3.5_Mini-Conversational.ipynb | 16 ++++++++-------- nb/Phi_3_Medium-Conversational.ipynb | 16 ++++++++-------- nb/Phi_4-Conversational.ipynb | 16 ++++++++-------- nb/Phi_4_(14B)-GRPO.ipynb | 16 ++++++++-------- nb/Pixtral_(12B)-Vision.ipynb | 4 ++-- nb/Qwen2.5_(3B)-GRPO.ipynb | 16 ++++++++-------- nb/Qwen2.5_(7B)-Alpaca.ipynb | 16 ++++++++-------- nb/Qwen2.5_Coder_(14B)-Conversational.ipynb | 16 ++++++++-------- nb/Qwen2.5_VL_(7B)-Vision.ipynb | 4 ++-- nb/Qwen2_(7B)-Alpaca.ipynb | 16 ++++++++-------- nb/Qwen2_5_7B_VL_GRPO.ipynb | 16 ++++++++-------- nb/Qwen2_VL_(7B)-Vision.ipynb | 4 ++-- nb/Qwen3_(14B)-Alpaca.ipynb | 16 ++++++++-------- nb/Qwen3_(14B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- nb/Qwen3_(14B).ipynb | 16 ++++++++-------- ...en3_(32B)_A100-Reasoning-Conversational.ipynb | 16 ++++++++-------- nb/Qwen3_(4B)-GRPO.ipynb | 16 ++++++++-------- nb/Qwen3_(4B)-Instruct.ipynb | 16 ++++++++-------- nb/Qwen3_(4B)-Thinking.ipynb | 16 ++++++++-------- nb/Qwen3_8B_FP8_GRPO.ipynb | 16 ++++++++-------- nb/Qwen3_VL_(8B)-Vision-GRPO.ipynb | 16 ++++++++-------- nb/Qwen3_VL_(8B)-Vision.ipynb | 4 ++-- nb/Sesame_CSM_(1B)-TTS.ipynb | 12 ++++++------ nb/Spark_TTS_(0_5B).ipynb | 16 ++++++++-------- nb/TinyLlama_(1.1B)-Alpaca.ipynb | 16 ++++++++-------- nb/Whisper.ipynb | 16 ++++++++-------- nb/gpt-oss-(120B)_A100-Fine-tuning.ipynb | 14 +++++++------- nb/gpt-oss-(20B)-Fine-tuning.ipynb | 8 ++++---- nb/gpt-oss-(20B)-GRPO.ipynb | 8 ++++---- nb/gpt-oss-(20B)_A100-GRPO.ipynb | 14 +++++++------- nb/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb | 8 ++++---- nb/gpt_oss_(20B)_GRPO_BF16.ipynb | 8 ++++---- ..._(20B)_Reinforcement_Learning_2048_Game.ipynb | 8 ++++---- ...)_Reinforcement_Learning_2048_Game_BF16.ipynb | 8 ++++---- ...nforcement_Learning_2048_Game_DGX_Spark.ipynb | 8 ++++---- nb/nemo_gym_sudoku.ipynb | 8 ++++---- .../Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- .../Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb | 16 ++++++++-------- ...ot-Finetune_for_Reasoning_on_CodeForces.ipynb | 16 ++++++++-------- .../CodeGemma_(7B)-Conversational.ipynb | 16 ++++++++-------- .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb | 16 ++++++++-------- original_template/Deepseek_OCR_(3B)-Eval.ipynb | 4 ++-- .../Deepseek_OCR_(3B)-Evaluation.ipynb | 4 ++-- original_template/Deepseek_OCR_(3B).ipynb | 4 ++-- .../ERNIE_4_5_21B_A3B_PT-Conversational.ipynb | 16 ++++++++-------- .../ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb | 4 ++-- original_template/Falcon_H1_(0.5B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Gemma2_(2B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Gemma2_(9B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Gemma3N_(4B)-Audio.ipynb | 2 +- .../Gemma3N_(4B)-Conversational.ipynb | 2 +- original_template/Gemma3N_(4B)-Vision.ipynb | 4 ++-- original_template/Gemma3_(1B)-GRPO.ipynb | 2 +- original_template/Gemma3_(270M).ipynb | 16 ++++++++-------- .../Gemma3_(27B)_A100-Conversational.ipynb | 2 +- original_template/Gemma3_(4B)-Vision-GRPO.ipynb | 4 ++-- original_template/Gemma3_(4B)-Vision.ipynb | 4 ++-- original_template/Gemma3_(4B).ipynb | 2 +- original_template/Granite4.0.ipynb | 16 ++++++++-------- original_template/Granite4.0_350M.ipynb | 16 ++++++++-------- .../Liquid_LFM2_(1.2B)-Conversational.ipynb | 16 ++++++++-------- original_template/Llama3.1_(8B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Llama3.1_(8B)-GRPO.ipynb | 16 ++++++++-------- original_template/Llama3.2_(11B)-Vision.ipynb | 4 ++-- original_template/Llama3.2_(1B)-RAFT.ipynb | 16 ++++++++-------- .../Llama3.2_(1B_and_3B)-Conversational.ipynb | 16 ++++++++-------- .../Llama3.3_(70B)_A100-Conversational.ipynb | 16 ++++++++-------- original_template/Llama3_(8B)-Alpaca.ipynb | 16 ++++++++-------- .../Llama3_(8B)-Conversational.ipynb | 16 ++++++++-------- original_template/Llama3_(8B)-ORPO.ipynb | 16 ++++++++-------- original_template/Llama_FP8_GRPO.ipynb | 16 ++++++++-------- original_template/Llasa_TTS_(1B).ipynb | 16 ++++++++-------- original_template/Llasa_TTS_(3B).ipynb | 16 ++++++++-------- ...agistral_(24B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- .../Meta-Synthetic-Data-Llama3.1_(8B).ipynb | 16 ++++++++-------- .../Meta_Synthetic_Data_Llama3_2_(3B).ipynb | 16 ++++++++-------- ...(3B)_Reinforcement_Learning_Sudoku_Game.ipynb | 16 ++++++++-------- .../Ministral_3_VL_(3B)_Vision.ipynb | 4 ++-- .../Mistral_Nemo_(12B)-Alpaca.ipynb | 16 ++++++++-------- .../Mistral_Small_(22B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Mistral_v0.3_(7B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Mistral_v0.3_(7B)-CPT.ipynb | 16 ++++++++-------- .../Mistral_v0.3_(7B)-Conversational.ipynb | 16 ++++++++-------- original_template/Mistral_v0.3_(7B)-GRPO.ipynb | 16 ++++++++-------- .../Nemotron-3-Nano-30B-A3B_A100.ipynb | 16 ++++++++-------- .../Nemotron-Nano-3-30B-A3B_A100.ipynb | 16 ++++++++-------- original_template/Orpheus_(3B)-TTS.ipynb | 16 ++++++++-------- original_template/Oute_TTS_(1B).ipynb | 16 ++++++++-------- original_template/Paddle_OCR_(1B)_Vision.ipynb | 4 ++-- .../Phi_3.5_Mini-Conversational.ipynb | 16 ++++++++-------- .../Phi_3_Medium-Conversational.ipynb | 16 ++++++++-------- original_template/Phi_4-Conversational.ipynb | 16 ++++++++-------- original_template/Phi_4_(14B)-GRPO.ipynb | 16 ++++++++-------- original_template/Pixtral_(12B)-Vision.ipynb | 4 ++-- original_template/Qwen2.5_(3B)-GRPO.ipynb | 16 ++++++++-------- original_template/Qwen2.5_(7B)-Alpaca.ipynb | 16 ++++++++-------- .../Qwen2.5_Coder_(14B)-Conversational.ipynb | 16 ++++++++-------- original_template/Qwen2.5_VL_(7B)-Vision.ipynb | 4 ++-- original_template/Qwen2_(7B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Qwen2_5_7B_VL_GRPO.ipynb | 16 ++++++++-------- original_template/Qwen2_VL_(7B)-Vision.ipynb | 4 ++-- original_template/Qwen3_(14B)-Alpaca.ipynb | 16 ++++++++-------- .../Qwen3_(14B)-Reasoning-Conversational.ipynb | 16 ++++++++-------- original_template/Qwen3_(14B).ipynb | 16 ++++++++-------- ...en3_(32B)_A100-Reasoning-Conversational.ipynb | 16 ++++++++-------- original_template/Qwen3_(4B)-GRPO.ipynb | 16 ++++++++-------- original_template/Qwen3_(4B)-Instruct.ipynb | 16 ++++++++-------- original_template/Qwen3_(4B)-Thinking.ipynb | 16 ++++++++-------- original_template/Qwen3_8B_FP8_GRPO.ipynb | 16 ++++++++-------- .../Qwen3_VL_(8B)-Vision-GRPO.ipynb | 16 ++++++++-------- original_template/Qwen3_VL_(8B)-Vision.ipynb | 4 ++-- original_template/Sesame_CSM_(1B)-TTS.ipynb | 12 ++++++------ original_template/Spark_TTS_(0_5B).ipynb | 16 ++++++++-------- original_template/TinyLlama_(1.1B)-Alpaca.ipynb | 16 ++++++++-------- original_template/Whisper.ipynb | 16 ++++++++-------- .../gpt-oss-(120B)_A100-Fine-tuning.ipynb | 14 +++++++------- .../gpt-oss-(20B)-Fine-tuning.ipynb | 8 ++++---- original_template/gpt-oss-(20B)-GRPO.ipynb | 8 ++++---- original_template/gpt-oss-(20B)_A100-GRPO.ipynb | 14 +++++++------- .../gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb | 8 ++++---- original_template/gpt_oss_(20B)_GRPO_BF16.ipynb | 8 ++++---- .../Advanced_Llama3_1_(3B)_GRPO_LoRA.py | 16 ++++++++-------- .../Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 16 ++++++++-------- ...s-cot-Finetune_for_Reasoning_on_CodeForces.py | 16 ++++++++-------- python_scripts/CodeGemma_(7B)-Conversational.py | 16 ++++++++-------- .../DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 16 ++++++++-------- python_scripts/Deepseek_OCR_(3B)-Eval.py | 4 ++-- python_scripts/Deepseek_OCR_(3B)-Evaluation.py | 4 ++-- python_scripts/Deepseek_OCR_(3B).py | 4 ++-- .../ERNIE_4_5_21B_A3B_PT-Conversational.py | 16 ++++++++-------- python_scripts/ERNIE_4_5_VL_28B_A3B_PT_Vision.py | 4 ++-- python_scripts/Falcon_H1_(0.5B)-Alpaca.py | 16 ++++++++-------- python_scripts/Gemma2_(2B)-Alpaca.py | 16 ++++++++-------- python_scripts/Gemma2_(9B)-Alpaca.py | 16 ++++++++-------- python_scripts/Gemma3N_(4B)-Audio.py | 2 +- python_scripts/Gemma3N_(4B)-Conversational.py | 2 +- python_scripts/Gemma3N_(4B)-Vision.py | 4 ++-- python_scripts/Gemma3_(1B)-GRPO.py | 2 +- python_scripts/Gemma3_(270M).py | 16 ++++++++-------- .../Gemma3_(27B)_A100-Conversational.py | 2 +- python_scripts/Gemma3_(4B)-Vision-GRPO.py | 4 ++-- python_scripts/Gemma3_(4B)-Vision.py | 4 ++-- python_scripts/Gemma3_(4B).py | 2 +- python_scripts/Granite4.0.py | 16 ++++++++-------- python_scripts/Granite4.0_350M.py | 16 ++++++++-------- ...ce Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py | 16 ++++++++-------- ...ce Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 16 ++++++++-------- ...ce Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Gemma3_(1B)-GRPO.py | 2 +- ...HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py | 4 ++-- .../HuggingFace Course-Llama3.1_(8B)-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Llama_FP8_GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Phi_4_(14B)-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Qwen2.5_(3B)-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Qwen2_5_7B_VL_GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Qwen3_(4B)-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-Qwen3_8B_FP8_GRPO.py | 16 ++++++++-------- ...ggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py | 16 ++++++++-------- .../HuggingFace Course-gpt-oss-(20B)-GRPO.py | 8 ++++---- ...HuggingFace Course-gpt-oss-(20B)_A100-GRPO.py | 14 +++++++------- ...HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py | 8 ++++---- .../Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py | 16 ++++++++-------- .../Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py | 16 ++++++++-------- ...s-cot-Finetune_for_Reasoning_on_CodeForces.py | 16 ++++++++-------- .../Kaggle-CodeGemma_(7B)-Conversational.py | 16 ++++++++-------- .../Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Deepseek_OCR_(3B)-Eval.py | 4 ++-- .../Kaggle-Deepseek_OCR_(3B)-Evaluation.py | 4 ++-- python_scripts/Kaggle-Deepseek_OCR_(3B).py | 4 ++-- ...Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.py | 16 ++++++++-------- .../Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.py | 4 ++-- python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Gemma2_(9B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Gemma3N_(4B)-Audio.py | 2 +- .../Kaggle-Gemma3N_(4B)-Conversational.py | 2 +- python_scripts/Kaggle-Gemma3N_(4B)-Vision.py | 4 ++-- python_scripts/Kaggle-Gemma3_(1B)-GRPO.py | 2 +- python_scripts/Kaggle-Gemma3_(270M).py | 16 ++++++++-------- .../Kaggle-Gemma3_(27B)_A100-Conversational.py | 2 +- python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py | 4 ++-- python_scripts/Kaggle-Gemma3_(4B)-Vision.py | 4 ++-- python_scripts/Kaggle-Gemma3_(4B).py | 2 +- python_scripts/Kaggle-Granite4.0.py | 16 ++++++++-------- python_scripts/Kaggle-Granite4.0_350M.py | 16 ++++++++-------- .../Kaggle-Liquid_LFM2_(1.2B)-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Llama3.1_(8B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Llama3.2_(11B)-Vision.py | 4 ++-- python_scripts/Kaggle-Llama3.2_(1B)-RAFT.py | 16 ++++++++-------- ...Kaggle-Llama3.2_(1B_and_3B)-Conversational.py | 16 ++++++++-------- .../Kaggle-Llama3.3_(70B)_A100-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Llama3_(8B)-Alpaca.py | 16 ++++++++-------- .../Kaggle-Llama3_(8B)-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Llama3_(8B)-ORPO.py | 16 ++++++++-------- python_scripts/Kaggle-Llama_FP8_GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Llasa_TTS_(1B).py | 16 ++++++++-------- python_scripts/Kaggle-Llasa_TTS_(3B).py | 16 ++++++++-------- ...e-Magistral_(24B)-Reasoning-Conversational.py | 16 ++++++++-------- .../Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).py | 16 ++++++++-------- .../Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).py | 16 ++++++++-------- ..._3_(3B)_Reinforcement_Learning_Sudoku_Game.py | 16 ++++++++-------- .../Kaggle-Ministral_3_VL_(3B)_Vision.py | 4 ++-- .../Kaggle-Mistral_Nemo_(12B)-Alpaca.py | 16 ++++++++-------- .../Kaggle-Mistral_Small_(22B)-Alpaca.py | 16 ++++++++-------- .../Kaggle-Mistral_v0.3_(7B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Mistral_v0.3_(7B)-CPT.py | 16 ++++++++-------- .../Kaggle-Mistral_v0.3_(7B)-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py | 16 ++++++++-------- .../Kaggle-Nemotron-3-Nano-30B-A3B_A100.py | 16 ++++++++-------- .../Kaggle-Nemotron-Nano-3-30B-A3B_A100.py | 16 ++++++++-------- python_scripts/Kaggle-Orpheus_(3B)-TTS.py | 16 ++++++++-------- python_scripts/Kaggle-Oute_TTS_(1B).py | 16 ++++++++-------- python_scripts/Kaggle-Paddle_OCR_(1B)_Vision.py | 4 ++-- .../Kaggle-Phi_3.5_Mini-Conversational.py | 16 ++++++++-------- .../Kaggle-Phi_3_Medium-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Phi_4-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Phi_4_(14B)-GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Pixtral_(12B)-Vision.py | 4 ++-- python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen2.5_(7B)-Alpaca.py | 16 ++++++++-------- .../Kaggle-Qwen2.5_Coder_(14B)-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen2.5_VL_(7B)-Vision.py | 4 ++-- python_scripts/Kaggle-Qwen2_(7B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen2_VL_(7B)-Vision.py | 4 ++-- python_scripts/Kaggle-Qwen3_(14B)-Alpaca.py | 16 ++++++++-------- ...aggle-Qwen3_(14B)-Reasoning-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_(14B).py | 16 ++++++++-------- ...-Qwen3_(32B)_A100-Reasoning-Conversational.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_(4B)-GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_(4B)-Instruct.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_(4B)-Thinking.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py | 16 ++++++++-------- .../Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py | 16 ++++++++-------- python_scripts/Kaggle-Qwen3_VL_(8B)-Vision.py | 4 ++-- python_scripts/Kaggle-Sesame_CSM_(1B)-TTS.py | 12 ++++++------ python_scripts/Kaggle-Spark_TTS_(0_5B).py | 16 ++++++++-------- python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py | 16 ++++++++-------- python_scripts/Kaggle-Whisper.py | 16 ++++++++-------- .../Kaggle-gpt-oss-(120B)_A100-Fine-tuning.py | 14 +++++++------- .../Kaggle-gpt-oss-(20B)-Fine-tuning.py | 8 ++++---- python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py | 8 ++++---- python_scripts/Kaggle-gpt-oss-(20B)_A100-GRPO.py | 14 +++++++------- ...gle-gpt_oss_(20B)_500K_Context_Fine_tuning.py | 8 ++++---- python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py | 8 ++++---- python_scripts/Liquid_LFM2-Conversational.py | 16 ++++++++-------- .../Liquid_LFM2_(1.2B)-Conversational.py | 16 ++++++++-------- python_scripts/Llama3.1_(8B)-Alpaca.py | 16 ++++++++-------- python_scripts/Llama3.1_(8B)-GRPO.py | 16 ++++++++-------- python_scripts/Llama3.2_(11B)-Vision.py | 4 ++-- python_scripts/Llama3.2_(1B)-RAFT.py | 16 ++++++++-------- .../Llama3.2_(1B_and_3B)-Conversational.py | 16 ++++++++-------- .../Llama3.3_(70B)_A100-Conversational.py | 16 ++++++++-------- python_scripts/Llama3_(8B)-Alpaca.py | 16 ++++++++-------- python_scripts/Llama3_(8B)-Conversational.py | 16 ++++++++-------- python_scripts/Llama3_(8B)-ORPO.py | 16 ++++++++-------- python_scripts/Llama_FP8_GRPO.py | 16 ++++++++-------- python_scripts/Llasa_TTS_(1B).py | 16 ++++++++-------- python_scripts/Llasa_TTS_(3B).py | 16 ++++++++-------- .../Magistral_(24B)-Reasoning-Conversational.py | 16 ++++++++-------- .../Meta-Synthetic-Data-Llama3.1_(8B).py | 16 ++++++++-------- .../Meta_Synthetic_Data_Llama3_2_(3B).py | 16 ++++++++-------- ..._3_(3B)_Reinforcement_Learning_Sudoku_Game.py | 16 ++++++++-------- python_scripts/Ministral_3_VL_(3B)_Vision.py | 4 ++-- python_scripts/Mistral_Nemo_(12B)-Alpaca.py | 16 ++++++++-------- python_scripts/Mistral_Small_(22B)-Alpaca.py | 16 ++++++++-------- python_scripts/Mistral_v0.3_(7B)-Alpaca.py | 16 ++++++++-------- python_scripts/Mistral_v0.3_(7B)-CPT.py | 16 ++++++++-------- .../Mistral_v0.3_(7B)-Conversational.py | 16 ++++++++-------- python_scripts/Mistral_v0.3_(7B)-GRPO.py | 16 ++++++++-------- python_scripts/Nemotron-3-Nano-30B-A3B_A100.py | 16 ++++++++-------- python_scripts/Nemotron-Nano-3-30B-A3B_A100.py | 16 ++++++++-------- ...oss_(20B)_Reinforcement_Learning_2048_Game.py | 8 ++++---- ...20B)_Reinforcement_Learning_2048_Game_BF16.py | 8 ++++---- python_scripts/Orpheus_(3B)-TTS.py | 16 ++++++++-------- python_scripts/Oute_TTS_(1B).py | 16 ++++++++-------- python_scripts/Paddle_OCR_(1B)_Vision.py | 4 ++-- python_scripts/Phi_3.5_Mini-Conversational.py | 16 ++++++++-------- python_scripts/Phi_3_Medium-Conversational.py | 16 ++++++++-------- python_scripts/Phi_4-Conversational.py | 16 ++++++++-------- python_scripts/Phi_4_(14B)-GRPO.py | 16 ++++++++-------- python_scripts/Pixtral_(12B)-Vision.py | 4 ++-- python_scripts/Qwen2.5_(3B)-GRPO.py | 16 ++++++++-------- python_scripts/Qwen2.5_(7B)-Alpaca.py | 16 ++++++++-------- .../Qwen2.5_Coder_(14B)-Conversational.py | 16 ++++++++-------- python_scripts/Qwen2.5_VL_(7B)-Vision.py | 4 ++-- python_scripts/Qwen2_(7B)-Alpaca.py | 16 ++++++++-------- python_scripts/Qwen2_5_7B_VL_GRPO.py | 16 ++++++++-------- python_scripts/Qwen2_VL_(7B)-Vision.py | 4 ++-- python_scripts/Qwen3_(14B)-Alpaca.py | 16 ++++++++-------- .../Qwen3_(14B)-Reasoning-Conversational.py | 16 ++++++++-------- python_scripts/Qwen3_(14B).py | 16 ++++++++-------- .../Qwen3_(32B)_A100-Reasoning-Conversational.py | 16 ++++++++-------- python_scripts/Qwen3_(4B)-GRPO.py | 16 ++++++++-------- python_scripts/Qwen3_(4B)-Instruct.py | 16 ++++++++-------- python_scripts/Qwen3_(4B)-Thinking.py | 16 ++++++++-------- python_scripts/Qwen3_8B_FP8_GRPO.py | 16 ++++++++-------- python_scripts/Qwen3_VL_(8B)-Vision-GRPO.py | 16 ++++++++-------- python_scripts/Qwen3_VL_(8B)-Vision.py | 4 ++-- python_scripts/Sesame_CSM_(1B)-TTS.py | 12 ++++++------ python_scripts/Spark_TTS_(0_5B).py | 16 ++++++++-------- python_scripts/TinyLlama_(1.1B)-Alpaca.py | 16 ++++++++-------- python_scripts/Whisper.py | 16 ++++++++-------- .../gpt-oss-(120B)_A100-Fine-tuning.py | 14 +++++++------- python_scripts/gpt-oss-(20B)-Fine-tuning.py | 8 ++++---- python_scripts/gpt-oss-(20B)-GRPO.py | 8 ++++---- python_scripts/gpt-oss-(20B)_A100-GRPO.py | 14 +++++++------- .../gpt_oss_(20B)_500K_Context_Fine_tuning.py | 8 ++++---- python_scripts/gpt_oss_(20B)_GRPO_BF16.py | 8 ++++---- ...oss_(20B)_Reinforcement_Learning_2048_Game.py | 8 ++++---- ...20B)_Reinforcement_Learning_2048_Game_BF16.py | 8 ++++---- ...Reinforcement_Learning_2048_Game_DGX_Spark.py | 8 ++++---- 474 files changed, 3009 insertions(+), 3009 deletions(-) diff --git a/nb/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 65d3a8da1..6a68c498d 100644 --- a/nb/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/nb/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -9703,20 +9703,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index 84d290d72..6cc7e2025 100644 --- a/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -12647,20 +12647,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb b/nb/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb index b8df3326e..852b2b1d3 100644 --- a/nb/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb +++ b/nb/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb @@ -1473,20 +1473,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/CodeGemma_(7B)-Conversational.ipynb b/nb/CodeGemma_(7B)-Conversational.ipynb index 308f71e87..bc962ecdb 100644 --- a/nb/CodeGemma_(7B)-Conversational.ipynb +++ b/nb/CodeGemma_(7B)-Conversational.ipynb @@ -824,20 +824,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 7ca7d92a5..ece268bff 100644 --- a/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -5022,20 +5022,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Deepseek_OCR_(3B)-Eval.ipynb b/nb/Deepseek_OCR_(3B)-Eval.ipynb index fa3ca67e8..f64389adf 100644 --- a/nb/Deepseek_OCR_(3B)-Eval.ipynb +++ b/nb/Deepseek_OCR_(3B)-Eval.ipynb @@ -1647,10 +1647,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Deepseek_OCR_(3B)-Evaluation.ipynb b/nb/Deepseek_OCR_(3B)-Evaluation.ipynb index 9018e8f35..a895e3370 100644 --- a/nb/Deepseek_OCR_(3B)-Evaluation.ipynb +++ b/nb/Deepseek_OCR_(3B)-Evaluation.ipynb @@ -1961,10 +1961,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Deepseek_OCR_(3B).ipynb b/nb/Deepseek_OCR_(3B).ipynb index fa3ca67e8..f64389adf 100644 --- a/nb/Deepseek_OCR_(3B).ipynb +++ b/nb/Deepseek_OCR_(3B).ipynb @@ -1647,10 +1647,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb b/nb/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb index 6e37aaeff..2f999baf8 100644 --- a/nb/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb +++ b/nb/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb @@ -1692,23 +1692,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb b/nb/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb index 106b2c35a..5741c33e3 100644 --- a/nb/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb +++ b/nb/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb @@ -2089,10 +2089,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Falcon_H1_(0.5B)-Alpaca.ipynb b/nb/Falcon_H1_(0.5B)-Alpaca.ipynb index 6030f3eef..9c660d8e2 100644 --- a/nb/Falcon_H1_(0.5B)-Alpaca.ipynb +++ b/nb/Falcon_H1_(0.5B)-Alpaca.ipynb @@ -989,20 +989,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/FunctionGemma_(270M)-Mobile-Actions.ipynb b/nb/FunctionGemma_(270M)-Mobile-Actions.ipynb index 4946dde89..125d0b688 100644 --- a/nb/FunctionGemma_(270M)-Mobile-Actions.ipynb +++ b/nb/FunctionGemma_(270M)-Mobile-Actions.ipynb @@ -1689,23 +1689,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"functiongemma-finetune\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"functiongemma-finetune-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/functiongemma-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/functiongemma-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"functiongemma-finetune\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"functiongemma-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/functiongemma-finetune\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/functiongemma-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"functiongemma-finetune\")\n", - " tokenizer.save_pretrained(\"functiongemma-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/functiongemma-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/functiongemma-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ], "metadata": { "id": "Ql1Nqa76y9af" diff --git a/nb/FunctionGemma_(270M).ipynb b/nb/FunctionGemma_(270M).ipynb index 49cfe795e..8177e7475 100644 --- a/nb/FunctionGemma_(270M).ipynb +++ b/nb/FunctionGemma_(270M).ipynb @@ -2227,23 +2227,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"functiongemma-finetune\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"functiongemma-finetune-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/functiongemma-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/functiongemma-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"functiongemma-finetune\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"functiongemma-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/functiongemma-finetune\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/functiongemma-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"functiongemma-finetune\")\n", - " tokenizer.save_pretrained(\"functiongemma-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/functiongemma-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/functiongemma-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Gemma2_(2B)-Alpaca.ipynb b/nb/Gemma2_(2B)-Alpaca.ipynb index 99a31d3ee..8e962093e 100644 --- a/nb/Gemma2_(2B)-Alpaca.ipynb +++ b/nb/Gemma2_(2B)-Alpaca.ipynb @@ -1197,20 +1197,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Gemma2_(9B)-Alpaca.ipynb b/nb/Gemma2_(9B)-Alpaca.ipynb index f3008dce0..301f90b6c 100644 --- a/nb/Gemma2_(9B)-Alpaca.ipynb +++ b/nb/Gemma2_(9B)-Alpaca.ipynb @@ -1224,20 +1224,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Gemma3N_(4B)-Audio.ipynb b/nb/Gemma3N_(4B)-Audio.ipynb index 42974d6f2..2ed5cbf8b 100644 --- a/nb/Gemma3N_(4B)-Audio.ipynb +++ b/nb/Gemma3N_(4B)-Audio.ipynb @@ -1566,7 +1566,7 @@ "outputs": [], "source": [ "if True: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3n\", processor)" + " model.save_pretrained_merged(\"gemma-3n-merged\", processor)" ] }, { diff --git a/nb/Gemma3N_(4B)-Conversational.ipynb b/nb/Gemma3N_(4B)-Conversational.ipynb index 76dce4a6b..0c1307a33 100644 --- a/nb/Gemma3N_(4B)-Conversational.ipynb +++ b/nb/Gemma3N_(4B)-Conversational.ipynb @@ -1963,7 +1963,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3N-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3N-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Gemma3N_(4B)-Vision.ipynb b/nb/Gemma3N_(4B)-Vision.ipynb index 516e4e7fd..4e1c75e44 100644 --- a/nb/Gemma3N_(4B)-Vision.ipynb +++ b/nb/Gemma3N_(4B)-Vision.ipynb @@ -1481,10 +1481,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Gemma3_(1B)-GRPO.ipynb b/nb/Gemma3_(1B)-GRPO.ipynb index 253636ded..6a5327ec0 100644 --- a/nb/Gemma3_(1B)-GRPO.ipynb +++ b/nb/Gemma3_(1B)-GRPO.ipynb @@ -1857,7 +1857,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Gemma3_(270M).ipynb b/nb/Gemma3_(270M).ipynb index c95da321c..da849e296 100644 --- a/nb/Gemma3_(270M).ipynb +++ b/nb/Gemma3_(270M).ipynb @@ -1667,23 +1667,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gemma-3-finetune\")\n", - " tokenizer.save_pretrained(\"gemma-3-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Gemma3_(27B)_A100-Conversational.ipynb b/nb/Gemma3_(27B)_A100-Conversational.ipynb index ead4f7779..0c56f1984 100644 --- a/nb/Gemma3_(27B)_A100-Conversational.ipynb +++ b/nb/Gemma3_(27B)_A100-Conversational.ipynb @@ -1008,7 +1008,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Gemma3_(4B)-Vision-GRPO.ipynb index 1fe2245be..f6a107349 100644 --- a/nb/Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/Gemma3_(4B)-Vision-GRPO.ipynb @@ -1949,10 +1949,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Gemma3_(4B)-Vision.ipynb b/nb/Gemma3_(4B)-Vision.ipynb index 540b7a190..6e0c95305 100644 --- a/nb/Gemma3_(4B)-Vision.ipynb +++ b/nb/Gemma3_(4B)-Vision.ipynb @@ -1450,10 +1450,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Gemma3_(4B).ipynb b/nb/Gemma3_(4B).ipynb index af1bbe417..6cc011b54 100644 --- a/nb/Gemma3_(4B).ipynb +++ b/nb/Gemma3_(4B).ipynb @@ -1236,7 +1236,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Granite4.0.ipynb b/nb/Granite4.0.ipynb index e412faa82..fcddef967 100644 --- a/nb/Granite4.0.ipynb +++ b/nb/Granite4.0.ipynb @@ -1439,23 +1439,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Granite4.0_350M.ipynb b/nb/Granite4.0_350M.ipynb index aa35a754a..2d5dc9c1c 100644 --- a/nb/Granite4.0_350M.ipynb +++ b/nb/Granite4.0_350M.ipynb @@ -1606,23 +1606,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index d595a48e0..b48b4a03c 100644 --- a/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/nb/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -9679,20 +9679,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index 84d290d72..6cc7e2025 100644 --- a/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -12647,20 +12647,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index c78374081..de6340f7b 100644 --- a/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -5024,20 +5024,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb index 67371abfc..213bce84d 100644 --- a/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Gemma3_(1B)-GRPO.ipynb @@ -1859,7 +1859,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb index 2e71562cd..c8dbf96a9 100644 --- a/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.ipynb @@ -1951,10 +1951,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb index 2a8c61ac4..d1b5d4d14 100644 --- a/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Llama3.1_(8B)-GRPO.ipynb @@ -9355,20 +9355,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb b/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb index e29796e06..d9457b722 100644 --- a/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb +++ b/nb/HuggingFace Course-Llama_FP8_GRPO.ipynb @@ -6547,20 +6547,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb index 17981e072..01a7e4716 100644 --- a/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.ipynb @@ -8652,20 +8652,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb index a66b19820..aafe2a886 100644 --- a/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Phi_4_(14B)-GRPO.ipynb @@ -5324,20 +5324,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb index f19074af5..ddea4715e 100644 --- a/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen2.5_(3B)-GRPO.ipynb @@ -8234,20 +8234,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb index 10ba25861..e703294e8 100644 --- a/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen2_5_7B_VL_GRPO.ipynb @@ -7008,20 +7008,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb index 422e5dd0d..dbeb3d2b0 100644 --- a/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen3_(4B)-GRPO.ipynb @@ -5627,20 +5627,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb b/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb index 9789b34eb..50f2099d9 100644 --- a/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen3_8B_FP8_GRPO.ipynb @@ -6604,20 +6604,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb index 7d5c40f82..8a8539fc3 100644 --- a/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/nb/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -4856,20 +4856,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb index 280d48a5b..f1e4b44e4 100644 --- a/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb +++ b/nb/HuggingFace Course-gpt-oss-(20B)-GRPO.ipynb @@ -5813,14 +5813,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.ipynb b/nb/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.ipynb index bcb14a290..a24a6c057 100644 --- a/nb/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.ipynb +++ b/nb/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.ipynb @@ -1695,21 +1695,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb index 471f3608d..f1686a678 100644 --- a/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.ipynb @@ -6219,14 +6219,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 4a7593ac2..5463684a4 100644 --- a/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/nb/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -9668,20 +9668,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index cd6f8484c..d80f0efbe 100644 --- a/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/nb/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -12638,20 +12638,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb b/nb/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb index 9d8a4b9de..6e9707c10 100644 --- a/nb/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb +++ b/nb/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb @@ -1473,20 +1473,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-CodeGemma_(7B)-Conversational.ipynb b/nb/Kaggle-CodeGemma_(7B)-Conversational.ipynb index e6bf9af8d..901163a7c 100644 --- a/nb/Kaggle-CodeGemma_(7B)-Conversational.ipynb +++ b/nb/Kaggle-CodeGemma_(7B)-Conversational.ipynb @@ -824,20 +824,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 1e5c2afcf..d56a4c06f 100644 --- a/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/nb/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -5015,20 +5015,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Deepseek_OCR_(3B)-Eval.ipynb b/nb/Kaggle-Deepseek_OCR_(3B)-Eval.ipynb index b9d8b43c1..ae177955e 100644 --- a/nb/Kaggle-Deepseek_OCR_(3B)-Eval.ipynb +++ b/nb/Kaggle-Deepseek_OCR_(3B)-Eval.ipynb @@ -1647,10 +1647,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Deepseek_OCR_(3B)-Evaluation.ipynb b/nb/Kaggle-Deepseek_OCR_(3B)-Evaluation.ipynb index 444f5fd1d..e3cddfd1d 100644 --- a/nb/Kaggle-Deepseek_OCR_(3B)-Evaluation.ipynb +++ b/nb/Kaggle-Deepseek_OCR_(3B)-Evaluation.ipynb @@ -1961,10 +1961,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Deepseek_OCR_(3B).ipynb b/nb/Kaggle-Deepseek_OCR_(3B).ipynb index b9d8b43c1..ae177955e 100644 --- a/nb/Kaggle-Deepseek_OCR_(3B).ipynb +++ b/nb/Kaggle-Deepseek_OCR_(3B).ipynb @@ -1647,10 +1647,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.ipynb b/nb/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.ipynb index a218c0ffe..680f4fbc6 100644 --- a/nb/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.ipynb +++ b/nb/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.ipynb @@ -1692,23 +1692,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb b/nb/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb index 068ae9b86..e4faa74bc 100644 --- a/nb/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb +++ b/nb/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb @@ -2089,10 +2089,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb b/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb index 502240060..d845dd528 100644 --- a/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb +++ b/nb/Kaggle-Falcon_H1_(0.5B)-Alpaca.ipynb @@ -989,20 +989,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb b/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb index 1a0c9ffdb..30c3a1e7a 100644 --- a/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb +++ b/nb/Kaggle-Gemma2_(2B)-Alpaca.ipynb @@ -1197,20 +1197,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Gemma2_(9B)-Alpaca.ipynb b/nb/Kaggle-Gemma2_(9B)-Alpaca.ipynb index 7f4a5d4d1..2c99b7251 100644 --- a/nb/Kaggle-Gemma2_(9B)-Alpaca.ipynb +++ b/nb/Kaggle-Gemma2_(9B)-Alpaca.ipynb @@ -1224,20 +1224,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Gemma3N_(4B)-Audio.ipynb b/nb/Kaggle-Gemma3N_(4B)-Audio.ipynb index 4d53136a8..275e82bbc 100644 --- a/nb/Kaggle-Gemma3N_(4B)-Audio.ipynb +++ b/nb/Kaggle-Gemma3N_(4B)-Audio.ipynb @@ -1566,7 +1566,7 @@ "outputs": [], "source": [ "if True: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3n\", processor)" + " model.save_pretrained_merged(\"gemma-3n-merged\", processor)" ] }, { diff --git a/nb/Kaggle-Gemma3N_(4B)-Conversational.ipynb b/nb/Kaggle-Gemma3N_(4B)-Conversational.ipynb index 253324923..ae5814d2d 100644 --- a/nb/Kaggle-Gemma3N_(4B)-Conversational.ipynb +++ b/nb/Kaggle-Gemma3N_(4B)-Conversational.ipynb @@ -1963,7 +1963,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3N-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3N-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Kaggle-Gemma3N_(4B)-Vision.ipynb b/nb/Kaggle-Gemma3N_(4B)-Vision.ipynb index efde0161b..84df2b927 100644 --- a/nb/Kaggle-Gemma3N_(4B)-Vision.ipynb +++ b/nb/Kaggle-Gemma3N_(4B)-Vision.ipynb @@ -1481,10 +1481,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb index 5d147a9b5..f25ca087f 100644 --- a/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb +++ b/nb/Kaggle-Gemma3_(1B)-GRPO.ipynb @@ -1850,7 +1850,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Kaggle-Gemma3_(270M).ipynb b/nb/Kaggle-Gemma3_(270M).ipynb index 4a51a2c7b..453cfe693 100644 --- a/nb/Kaggle-Gemma3_(270M).ipynb +++ b/nb/Kaggle-Gemma3_(270M).ipynb @@ -1667,23 +1667,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gemma-3-finetune\")\n", - " tokenizer.save_pretrained(\"gemma-3-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Gemma3_(27B)_A100-Conversational.ipynb b/nb/Kaggle-Gemma3_(27B)_A100-Conversational.ipynb index 8c3bb8107..377984663 100644 --- a/nb/Kaggle-Gemma3_(27B)_A100-Conversational.ipynb +++ b/nb/Kaggle-Gemma3_(27B)_A100-Conversational.ipynb @@ -1008,7 +1008,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb index 49641f55f..291425417 100644 --- a/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb +++ b/nb/Kaggle-Gemma3_(4B)-Vision-GRPO.ipynb @@ -1942,10 +1942,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Gemma3_(4B)-Vision.ipynb b/nb/Kaggle-Gemma3_(4B)-Vision.ipynb index 97e1a379b..b0df28209 100644 --- a/nb/Kaggle-Gemma3_(4B)-Vision.ipynb +++ b/nb/Kaggle-Gemma3_(4B)-Vision.ipynb @@ -1450,10 +1450,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Gemma3_(4B).ipynb b/nb/Kaggle-Gemma3_(4B).ipynb index 17ef34845..dcb7b1941 100644 --- a/nb/Kaggle-Gemma3_(4B).ipynb +++ b/nb/Kaggle-Gemma3_(4B).ipynb @@ -1236,7 +1236,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/nb/Kaggle-Granite4.0.ipynb b/nb/Kaggle-Granite4.0.ipynb index e412faa82..fcddef967 100644 --- a/nb/Kaggle-Granite4.0.ipynb +++ b/nb/Kaggle-Granite4.0.ipynb @@ -1439,23 +1439,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Granite4.0_350M.ipynb b/nb/Kaggle-Granite4.0_350M.ipynb index aa35a754a..2d5dc9c1c 100644 --- a/nb/Kaggle-Granite4.0_350M.ipynb +++ b/nb/Kaggle-Granite4.0_350M.ipynb @@ -1606,23 +1606,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Liquid_LFM2_(1.2B)-Conversational.ipynb b/nb/Kaggle-Liquid_LFM2_(1.2B)-Conversational.ipynb index 81ebdcef9..60e80af3d 100644 --- a/nb/Kaggle-Liquid_LFM2_(1.2B)-Conversational.ipynb +++ b/nb/Kaggle-Liquid_LFM2_(1.2B)-Conversational.ipynb @@ -1539,20 +1539,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Kaggle-Llama3.1_(8B)-Alpaca.ipynb b/nb/Kaggle-Llama3.1_(8B)-Alpaca.ipynb index a6047e059..13b70c0a0 100644 --- a/nb/Kaggle-Llama3.1_(8B)-Alpaca.ipynb +++ b/nb/Kaggle-Llama3.1_(8B)-Alpaca.ipynb @@ -1165,20 +1165,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb index d01e2b491..1e587e7c4 100644 --- a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb @@ -9346,20 +9346,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3.2_(11B)-Vision.ipynb b/nb/Kaggle-Llama3.2_(11B)-Vision.ipynb index 2c9abef0c..2055045d2 100644 --- a/nb/Kaggle-Llama3.2_(11B)-Vision.ipynb +++ b/nb/Kaggle-Llama3.2_(11B)-Vision.ipynb @@ -1354,10 +1354,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Llama3.2_(1B)-RAFT.ipynb b/nb/Kaggle-Llama3.2_(1B)-RAFT.ipynb index 50e1ae0d1..5082165b5 100644 --- a/nb/Kaggle-Llama3.2_(1B)-RAFT.ipynb +++ b/nb/Kaggle-Llama3.2_(1B)-RAFT.ipynb @@ -1355,20 +1355,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3.2_(1B_and_3B)-Conversational.ipynb b/nb/Kaggle-Llama3.2_(1B_and_3B)-Conversational.ipynb index 892a45272..0312c7231 100644 --- a/nb/Kaggle-Llama3.2_(1B_and_3B)-Conversational.ipynb +++ b/nb/Kaggle-Llama3.2_(1B_and_3B)-Conversational.ipynb @@ -1303,20 +1303,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3.3_(70B)_A100-Conversational.ipynb b/nb/Kaggle-Llama3.3_(70B)_A100-Conversational.ipynb index 97f32073e..88a3645c5 100644 --- a/nb/Kaggle-Llama3.3_(70B)_A100-Conversational.ipynb +++ b/nb/Kaggle-Llama3.3_(70B)_A100-Conversational.ipynb @@ -1699,20 +1699,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3_(8B)-Alpaca.ipynb b/nb/Kaggle-Llama3_(8B)-Alpaca.ipynb index debd718d6..a66cb2add 100644 --- a/nb/Kaggle-Llama3_(8B)-Alpaca.ipynb +++ b/nb/Kaggle-Llama3_(8B)-Alpaca.ipynb @@ -1218,20 +1218,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3_(8B)-Conversational.ipynb b/nb/Kaggle-Llama3_(8B)-Conversational.ipynb index 0bf35c9c9..e91cea1b6 100644 --- a/nb/Kaggle-Llama3_(8B)-Conversational.ipynb +++ b/nb/Kaggle-Llama3_(8B)-Conversational.ipynb @@ -1318,20 +1318,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama3_(8B)-ORPO.ipynb b/nb/Kaggle-Llama3_(8B)-ORPO.ipynb index dd7976fd6..91f8e34e6 100644 --- a/nb/Kaggle-Llama3_(8B)-ORPO.ipynb +++ b/nb/Kaggle-Llama3_(8B)-ORPO.ipynb @@ -1383,20 +1383,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llama_FP8_GRPO.ipynb b/nb/Kaggle-Llama_FP8_GRPO.ipynb index ef64934f3..0f369fb91 100644 --- a/nb/Kaggle-Llama_FP8_GRPO.ipynb +++ b/nb/Kaggle-Llama_FP8_GRPO.ipynb @@ -6538,20 +6538,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llasa_TTS_(1B).ipynb b/nb/Kaggle-Llasa_TTS_(1B).ipynb index 5dd77959f..988fb1357 100644 --- a/nb/Kaggle-Llasa_TTS_(1B).ipynb +++ b/nb/Kaggle-Llasa_TTS_(1B).ipynb @@ -1535,20 +1535,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Llasa_TTS_(3B).ipynb b/nb/Kaggle-Llasa_TTS_(3B).ipynb index b1eaf56b4..02563c29d 100644 --- a/nb/Kaggle-Llasa_TTS_(3B).ipynb +++ b/nb/Kaggle-Llasa_TTS_(3B).ipynb @@ -1527,20 +1527,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Magistral_(24B)-Reasoning-Conversational.ipynb b/nb/Kaggle-Magistral_(24B)-Reasoning-Conversational.ipynb index 3da045498..5df0fd252 100644 --- a/nb/Kaggle-Magistral_(24B)-Reasoning-Conversational.ipynb +++ b/nb/Kaggle-Magistral_(24B)-Reasoning-Conversational.ipynb @@ -1167,23 +1167,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).ipynb b/nb/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).ipynb index 1555c7984..33578bbce 100644 --- a/nb/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).ipynb +++ b/nb/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).ipynb @@ -1733,20 +1733,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).ipynb b/nb/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).ipynb index f5488c27f..f31bf2be2 100644 --- a/nb/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).ipynb +++ b/nb/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).ipynb @@ -1660,23 +1660,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb b/nb/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb index 9289bf80e..6f18469fb 100644 --- a/nb/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb +++ b/nb/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb @@ -10932,20 +10932,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Ministral_3_VL_(3B)_Vision.ipynb b/nb/Kaggle-Ministral_3_VL_(3B)_Vision.ipynb index 898184777..72f7f621b 100644 --- a/nb/Kaggle-Ministral_3_VL_(3B)_Vision.ipynb +++ b/nb/Kaggle-Ministral_3_VL_(3B)_Vision.ipynb @@ -1428,10 +1428,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Mistral_Nemo_(12B)-Alpaca.ipynb b/nb/Kaggle-Mistral_Nemo_(12B)-Alpaca.ipynb index ba5210b7c..3e91b9e78 100644 --- a/nb/Kaggle-Mistral_Nemo_(12B)-Alpaca.ipynb +++ b/nb/Kaggle-Mistral_Nemo_(12B)-Alpaca.ipynb @@ -1331,20 +1331,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Mistral_Small_(22B)-Alpaca.ipynb b/nb/Kaggle-Mistral_Small_(22B)-Alpaca.ipynb index 00743666a..cd67fc548 100644 --- a/nb/Kaggle-Mistral_Small_(22B)-Alpaca.ipynb +++ b/nb/Kaggle-Mistral_Small_(22B)-Alpaca.ipynb @@ -1317,20 +1317,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-Alpaca.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-Alpaca.ipynb index 3f6a7ba2d..f5d566554 100644 --- a/nb/Kaggle-Mistral_v0.3_(7B)-Alpaca.ipynb +++ b/nb/Kaggle-Mistral_v0.3_(7B)-Alpaca.ipynb @@ -1254,20 +1254,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-CPT.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-CPT.ipynb index 316a3b3ea..799d46041 100644 --- a/nb/Kaggle-Mistral_v0.3_(7B)-CPT.ipynb +++ b/nb/Kaggle-Mistral_v0.3_(7B)-CPT.ipynb @@ -719,20 +719,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-Conversational.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-Conversational.ipynb index 2c7bccb02..59044aac2 100644 --- a/nb/Kaggle-Mistral_v0.3_(7B)-Conversational.ipynb +++ b/nb/Kaggle-Mistral_v0.3_(7B)-Conversational.ipynb @@ -1377,20 +1377,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb index 4d4068859..ce2c98ea0 100644 --- a/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/Kaggle-Mistral_v0.3_(7B)-GRPO.ipynb @@ -8643,20 +8643,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Nemotron-3-Nano-30B-A3B_A100.ipynb b/nb/Kaggle-Nemotron-3-Nano-30B-A3B_A100.ipynb index a154d60ea..f361f3b55 100644 --- a/nb/Kaggle-Nemotron-3-Nano-30B-A3B_A100.ipynb +++ b/nb/Kaggle-Nemotron-3-Nano-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Nemotron-Nano-3-30B-A3B_A100.ipynb b/nb/Kaggle-Nemotron-Nano-3-30B-A3B_A100.ipynb index a154d60ea..f361f3b55 100644 --- a/nb/Kaggle-Nemotron-Nano-3-30B-A3B_A100.ipynb +++ b/nb/Kaggle-Nemotron-Nano-3-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Orpheus_(3B)-TTS.ipynb b/nb/Kaggle-Orpheus_(3B)-TTS.ipynb index 5499511a0..7c367cf9c 100644 --- a/nb/Kaggle-Orpheus_(3B)-TTS.ipynb +++ b/nb/Kaggle-Orpheus_(3B)-TTS.ipynb @@ -2158,20 +2158,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Oute_TTS_(1B).ipynb b/nb/Kaggle-Oute_TTS_(1B).ipynb index f687c5f9c..f464f2fa5 100644 --- a/nb/Kaggle-Oute_TTS_(1B).ipynb +++ b/nb/Kaggle-Oute_TTS_(1B).ipynb @@ -4563,20 +4563,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Paddle_OCR_(1B)_Vision.ipynb b/nb/Kaggle-Paddle_OCR_(1B)_Vision.ipynb index da04afdc1..6ce11a217 100644 --- a/nb/Kaggle-Paddle_OCR_(1B)_Vision.ipynb +++ b/nb/Kaggle-Paddle_OCR_(1B)_Vision.ipynb @@ -1644,10 +1644,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Phi_3.5_Mini-Conversational.ipynb b/nb/Kaggle-Phi_3.5_Mini-Conversational.ipynb index c449f0d47..e19d06b12 100644 --- a/nb/Kaggle-Phi_3.5_Mini-Conversational.ipynb +++ b/nb/Kaggle-Phi_3.5_Mini-Conversational.ipynb @@ -1327,20 +1327,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Phi_3_Medium-Conversational.ipynb b/nb/Kaggle-Phi_3_Medium-Conversational.ipynb index 4a20f5458..2ad5c2e17 100644 --- a/nb/Kaggle-Phi_3_Medium-Conversational.ipynb +++ b/nb/Kaggle-Phi_3_Medium-Conversational.ipynb @@ -1449,20 +1449,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Phi_4-Conversational.ipynb b/nb/Kaggle-Phi_4-Conversational.ipynb index 127748492..a380016a2 100644 --- a/nb/Kaggle-Phi_4-Conversational.ipynb +++ b/nb/Kaggle-Phi_4-Conversational.ipynb @@ -1500,20 +1500,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb index 50e54fa08..ec77a86e0 100644 --- a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb +++ b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb @@ -5315,20 +5315,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Pixtral_(12B)-Vision.ipynb b/nb/Kaggle-Pixtral_(12B)-Vision.ipynb index 11b86fd22..8e467124f 100644 --- a/nb/Kaggle-Pixtral_(12B)-Vision.ipynb +++ b/nb/Kaggle-Pixtral_(12B)-Vision.ipynb @@ -1238,10 +1238,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb index cfa4a0025..16495ab4a 100644 --- a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb @@ -8225,20 +8225,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen2.5_(7B)-Alpaca.ipynb b/nb/Kaggle-Qwen2.5_(7B)-Alpaca.ipynb index 6dab40c70..749d40d99 100644 --- a/nb/Kaggle-Qwen2.5_(7B)-Alpaca.ipynb +++ b/nb/Kaggle-Qwen2.5_(7B)-Alpaca.ipynb @@ -1247,20 +1247,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen2.5_Coder_(14B)-Conversational.ipynb b/nb/Kaggle-Qwen2.5_Coder_(14B)-Conversational.ipynb index 8c40af37c..c74bda5f3 100644 --- a/nb/Kaggle-Qwen2.5_Coder_(14B)-Conversational.ipynb +++ b/nb/Kaggle-Qwen2.5_Coder_(14B)-Conversational.ipynb @@ -1537,20 +1537,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen2.5_VL_(7B)-Vision.ipynb b/nb/Kaggle-Qwen2.5_VL_(7B)-Vision.ipynb index e89ca379b..96386e3b7 100644 --- a/nb/Kaggle-Qwen2.5_VL_(7B)-Vision.ipynb +++ b/nb/Kaggle-Qwen2.5_VL_(7B)-Vision.ipynb @@ -1377,10 +1377,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Qwen2_(7B)-Alpaca.ipynb b/nb/Kaggle-Qwen2_(7B)-Alpaca.ipynb index c61723857..1196d0f22 100644 --- a/nb/Kaggle-Qwen2_(7B)-Alpaca.ipynb +++ b/nb/Kaggle-Qwen2_(7B)-Alpaca.ipynb @@ -1312,20 +1312,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb index c923f7ea6..43cef289a 100644 --- a/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/Kaggle-Qwen2_5_7B_VL_GRPO.ipynb @@ -6999,20 +6999,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Kaggle-Qwen2_VL_(7B)-Vision.ipynb b/nb/Kaggle-Qwen2_VL_(7B)-Vision.ipynb index a971ebab0..96220922e 100644 --- a/nb/Kaggle-Qwen2_VL_(7B)-Vision.ipynb +++ b/nb/Kaggle-Qwen2_VL_(7B)-Vision.ipynb @@ -1366,10 +1366,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Qwen3_(14B)-Alpaca.ipynb b/nb/Kaggle-Qwen3_(14B)-Alpaca.ipynb index d3bf485b1..af6c23cb2 100644 --- a/nb/Kaggle-Qwen3_(14B)-Alpaca.ipynb +++ b/nb/Kaggle-Qwen3_(14B)-Alpaca.ipynb @@ -1413,20 +1413,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(14B)-Reasoning-Conversational.ipynb b/nb/Kaggle-Qwen3_(14B)-Reasoning-Conversational.ipynb index 671f3557a..d6b7755a7 100644 --- a/nb/Kaggle-Qwen3_(14B)-Reasoning-Conversational.ipynb +++ b/nb/Kaggle-Qwen3_(14B)-Reasoning-Conversational.ipynb @@ -1647,23 +1647,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(14B).ipynb b/nb/Kaggle-Qwen3_(14B).ipynb index 77e565bad..305b59682 100644 --- a/nb/Kaggle-Qwen3_(14B).ipynb +++ b/nb/Kaggle-Qwen3_(14B).ipynb @@ -1444,23 +1444,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.ipynb b/nb/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.ipynb index ac32f15b3..328c82e64 100644 --- a/nb/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.ipynb +++ b/nb/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.ipynb @@ -1724,23 +1724,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb index c2ece782b..85aa09f5e 100644 --- a/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb +++ b/nb/Kaggle-Qwen3_(4B)-GRPO.ipynb @@ -5618,20 +5618,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(4B)-Instruct.ipynb b/nb/Kaggle-Qwen3_(4B)-Instruct.ipynb index 89bd58442..f23737894 100644 --- a/nb/Kaggle-Qwen3_(4B)-Instruct.ipynb +++ b/nb/Kaggle-Qwen3_(4B)-Instruct.ipynb @@ -1502,23 +1502,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_(4B)-Thinking.ipynb b/nb/Kaggle-Qwen3_(4B)-Thinking.ipynb index ad70f7462..ea8fa542e 100644 --- a/nb/Kaggle-Qwen3_(4B)-Thinking.ipynb +++ b/nb/Kaggle-Qwen3_(4B)-Thinking.ipynb @@ -1518,23 +1518,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb b/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb index 5ed9fa25c..50417f39d 100644 --- a/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb +++ b/nb/Kaggle-Qwen3_8B_FP8_GRPO.ipynb @@ -6595,20 +6595,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb index 3e406b6db..8d3d52af1 100644 --- a/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/nb/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -4847,20 +4847,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Kaggle-Qwen3_VL_(8B)-Vision.ipynb b/nb/Kaggle-Qwen3_VL_(8B)-Vision.ipynb index b6f6f297c..36ace323e 100644 --- a/nb/Kaggle-Qwen3_VL_(8B)-Vision.ipynb +++ b/nb/Kaggle-Qwen3_VL_(8B)-Vision.ipynb @@ -1075,10 +1075,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Kaggle-Sesame_CSM_(1B)-TTS.ipynb b/nb/Kaggle-Sesame_CSM_(1B)-TTS.ipynb index 0b899dffb..d85af7fc0 100644 --- a/nb/Kaggle-Sesame_CSM_(1B)-TTS.ipynb +++ b/nb/Kaggle-Sesame_CSM_(1B)-TTS.ipynb @@ -918,19 +918,19 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", processor, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", processor, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", processor, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", processor, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", " processor.save_pretrained(\"model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", " processor.push_to_hub(\"hf/model\", token = \"\")\n" ] }, diff --git a/nb/Kaggle-Spark_TTS_(0_5B).ipynb b/nb/Kaggle-Spark_TTS_(0_5B).ipynb index 78e8ab288..eca92a6ad 100644 --- a/nb/Kaggle-Spark_TTS_(0_5B).ipynb +++ b/nb/Kaggle-Spark_TTS_(0_5B).ipynb @@ -1518,20 +1518,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb b/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb index 65502da0a..334c58c67 100644 --- a/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb +++ b/nb/Kaggle-TinyLlama_(1.1B)-Alpaca.ipynb @@ -2460,20 +2460,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-Whisper.ipynb b/nb/Kaggle-Whisper.ipynb index 715f20660..7702e2888 100644 --- a/nb/Kaggle-Whisper.ipynb +++ b/nb/Kaggle-Whisper.ipynb @@ -1090,20 +1090,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = None,)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = None,)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.ipynb b/nb/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.ipynb index 748f6f239..601c136b3 100644 --- a/nb/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.ipynb +++ b/nb/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.ipynb @@ -1693,21 +1693,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Kaggle-gpt-oss-(20B)-Fine-tuning.ipynb b/nb/Kaggle-gpt-oss-(20B)-Fine-tuning.ipynb index c22bee081..95e82f703 100644 --- a/nb/Kaggle-gpt-oss-(20B)-Fine-tuning.ipynb +++ b/nb/Kaggle-gpt-oss-(20B)-Fine-tuning.ipynb @@ -1422,14 +1422,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb index 280d48a5b..f1e4b44e4 100644 --- a/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb +++ b/nb/Kaggle-gpt-oss-(20B)-GRPO.ipynb @@ -5813,14 +5813,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Kaggle-gpt-oss-(20B)_A100-GRPO.ipynb b/nb/Kaggle-gpt-oss-(20B)_A100-GRPO.ipynb index fe7772ccd..edd3bb39d 100644 --- a/nb/Kaggle-gpt-oss-(20B)_A100-GRPO.ipynb +++ b/nb/Kaggle-gpt-oss-(20B)_A100-GRPO.ipynb @@ -1693,21 +1693,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb b/nb/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb index 018a28f90..779972f5c 100644 --- a/nb/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb +++ b/nb/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb @@ -879,14 +879,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb index a5f7ddb83..1f2391495 100644 --- a/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/Kaggle-gpt_oss_(20B)_GRPO_BF16.ipynb @@ -6217,14 +6217,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Liquid_LFM2-Conversational.ipynb b/nb/Liquid_LFM2-Conversational.ipynb index fc2b46ff2..62d14c41a 100644 --- a/nb/Liquid_LFM2-Conversational.ipynb +++ b/nb/Liquid_LFM2-Conversational.ipynb @@ -1544,20 +1544,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Liquid_LFM2_(1.2B)-Conversational.ipynb b/nb/Liquid_LFM2_(1.2B)-Conversational.ipynb index 104efc0c8..b1790ea02 100644 --- a/nb/Liquid_LFM2_(1.2B)-Conversational.ipynb +++ b/nb/Liquid_LFM2_(1.2B)-Conversational.ipynb @@ -1539,20 +1539,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Llama3.1_(8B)-Alpaca.ipynb b/nb/Llama3.1_(8B)-Alpaca.ipynb index 935649b72..a63354f58 100644 --- a/nb/Llama3.1_(8B)-Alpaca.ipynb +++ b/nb/Llama3.1_(8B)-Alpaca.ipynb @@ -1165,20 +1165,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb index d77fe5830..306ad5b91 100644 --- a/nb/Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Llama3.1_(8B)-GRPO.ipynb @@ -9353,20 +9353,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3.2_(11B)-Vision.ipynb b/nb/Llama3.2_(11B)-Vision.ipynb index a44f0eab2..662191149 100644 --- a/nb/Llama3.2_(11B)-Vision.ipynb +++ b/nb/Llama3.2_(11B)-Vision.ipynb @@ -1354,10 +1354,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Llama3.2_(1B)-RAFT.ipynb b/nb/Llama3.2_(1B)-RAFT.ipynb index 0d56baefd..1219f40b8 100644 --- a/nb/Llama3.2_(1B)-RAFT.ipynb +++ b/nb/Llama3.2_(1B)-RAFT.ipynb @@ -1355,20 +1355,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb b/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb index 0262136f1..176b580c2 100644 --- a/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb +++ b/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb @@ -1303,20 +1303,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3.3_(70B)_A100-Conversational.ipynb b/nb/Llama3.3_(70B)_A100-Conversational.ipynb index 8bd6a382f..7aa6ac173 100644 --- a/nb/Llama3.3_(70B)_A100-Conversational.ipynb +++ b/nb/Llama3.3_(70B)_A100-Conversational.ipynb @@ -1699,20 +1699,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3_(8B)-Alpaca.ipynb b/nb/Llama3_(8B)-Alpaca.ipynb index 5286eb923..3f0654cca 100644 --- a/nb/Llama3_(8B)-Alpaca.ipynb +++ b/nb/Llama3_(8B)-Alpaca.ipynb @@ -1218,20 +1218,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3_(8B)-Conversational.ipynb b/nb/Llama3_(8B)-Conversational.ipynb index 299ffc493..ebc172836 100644 --- a/nb/Llama3_(8B)-Conversational.ipynb +++ b/nb/Llama3_(8B)-Conversational.ipynb @@ -1318,20 +1318,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama3_(8B)-ORPO.ipynb b/nb/Llama3_(8B)-ORPO.ipynb index 2c9569a38..140029785 100644 --- a/nb/Llama3_(8B)-ORPO.ipynb +++ b/nb/Llama3_(8B)-ORPO.ipynb @@ -1383,20 +1383,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llama_FP8_GRPO.ipynb b/nb/Llama_FP8_GRPO.ipynb index db5a1c14e..7651aa9bd 100644 --- a/nb/Llama_FP8_GRPO.ipynb +++ b/nb/Llama_FP8_GRPO.ipynb @@ -6595,20 +6595,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llasa_TTS_(1B).ipynb b/nb/Llasa_TTS_(1B).ipynb index fc470fd36..89a99fa24 100644 --- a/nb/Llasa_TTS_(1B).ipynb +++ b/nb/Llasa_TTS_(1B).ipynb @@ -1535,20 +1535,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Llasa_TTS_(3B).ipynb b/nb/Llasa_TTS_(3B).ipynb index c5be3c8ec..753803cdb 100644 --- a/nb/Llasa_TTS_(3B).ipynb +++ b/nb/Llasa_TTS_(3B).ipynb @@ -1527,20 +1527,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Magistral_(24B)-Reasoning-Conversational.ipynb b/nb/Magistral_(24B)-Reasoning-Conversational.ipynb index d41fca992..bf2364de8 100644 --- a/nb/Magistral_(24B)-Reasoning-Conversational.ipynb +++ b/nb/Magistral_(24B)-Reasoning-Conversational.ipynb @@ -1167,23 +1167,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Meta-Synthetic-Data-Llama3.1_(8B).ipynb b/nb/Meta-Synthetic-Data-Llama3.1_(8B).ipynb index 0ea4fba04..5e415f30a 100644 --- a/nb/Meta-Synthetic-Data-Llama3.1_(8B).ipynb +++ b/nb/Meta-Synthetic-Data-Llama3.1_(8B).ipynb @@ -1747,20 +1747,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Meta_Synthetic_Data_Llama3_2_(3B).ipynb b/nb/Meta_Synthetic_Data_Llama3_2_(3B).ipynb index 0b6f03241..cfe767ff5 100644 --- a/nb/Meta_Synthetic_Data_Llama3_2_(3B).ipynb +++ b/nb/Meta_Synthetic_Data_Llama3_2_(3B).ipynb @@ -1674,23 +1674,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb b/nb/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb index 6302d84f0..348ad314b 100644 --- a/nb/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb +++ b/nb/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb @@ -10944,20 +10944,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Ministral_3_VL_(3B)_Vision.ipynb b/nb/Ministral_3_VL_(3B)_Vision.ipynb index c9cd6b859..05cd486d5 100644 --- a/nb/Ministral_3_VL_(3B)_Vision.ipynb +++ b/nb/Ministral_3_VL_(3B)_Vision.ipynb @@ -1461,10 +1461,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Mistral_Nemo_(12B)-Alpaca.ipynb b/nb/Mistral_Nemo_(12B)-Alpaca.ipynb index 6dd54d701..50355a444 100644 --- a/nb/Mistral_Nemo_(12B)-Alpaca.ipynb +++ b/nb/Mistral_Nemo_(12B)-Alpaca.ipynb @@ -1331,20 +1331,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Mistral_Small_(22B)-Alpaca.ipynb b/nb/Mistral_Small_(22B)-Alpaca.ipynb index fe4344d03..2ed9b1a93 100644 --- a/nb/Mistral_Small_(22B)-Alpaca.ipynb +++ b/nb/Mistral_Small_(22B)-Alpaca.ipynb @@ -1317,20 +1317,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Mistral_v0.3_(7B)-Alpaca.ipynb b/nb/Mistral_v0.3_(7B)-Alpaca.ipynb index 5bc3494c1..dca999d17 100644 --- a/nb/Mistral_v0.3_(7B)-Alpaca.ipynb +++ b/nb/Mistral_v0.3_(7B)-Alpaca.ipynb @@ -1254,20 +1254,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Mistral_v0.3_(7B)-CPT.ipynb b/nb/Mistral_v0.3_(7B)-CPT.ipynb index 08c5ecc4b..958255b7a 100644 --- a/nb/Mistral_v0.3_(7B)-CPT.ipynb +++ b/nb/Mistral_v0.3_(7B)-CPT.ipynb @@ -719,20 +719,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Mistral_v0.3_(7B)-Conversational.ipynb b/nb/Mistral_v0.3_(7B)-Conversational.ipynb index bd5423f43..c52fd47ff 100644 --- a/nb/Mistral_v0.3_(7B)-Conversational.ipynb +++ b/nb/Mistral_v0.3_(7B)-Conversational.ipynb @@ -1377,20 +1377,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Mistral_v0.3_(7B)-GRPO.ipynb b/nb/Mistral_v0.3_(7B)-GRPO.ipynb index f6262821d..f1fffbff8 100644 --- a/nb/Mistral_v0.3_(7B)-GRPO.ipynb +++ b/nb/Mistral_v0.3_(7B)-GRPO.ipynb @@ -8650,20 +8650,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Nemotron-3-Nano-30B-A3B_A100.ipynb b/nb/Nemotron-3-Nano-30B-A3B_A100.ipynb index a154d60ea..f361f3b55 100644 --- a/nb/Nemotron-3-Nano-30B-A3B_A100.ipynb +++ b/nb/Nemotron-3-Nano-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Nemotron-Nano-3-30B-A3B_A100.ipynb b/nb/Nemotron-Nano-3-30B-A3B_A100.ipynb index a154d60ea..f361f3b55 100644 --- a/nb/Nemotron-Nano-3-30B-A3B_A100.ipynb +++ b/nb/Nemotron-Nano-3-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb b/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb index b0b782823..472463380 100644 --- a/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb +++ b/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb @@ -4101,15 +4101,15 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", "if False:\n", - " model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb b/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb index 0ccf73589..f15f1054a 100644 --- a/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb +++ b/nb/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb @@ -2565,15 +2565,15 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", "if False:\n", - " model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/Orpheus_(3B)-TTS.ipynb b/nb/Orpheus_(3B)-TTS.ipynb index a68ad62cc..9687b45e7 100644 --- a/nb/Orpheus_(3B)-TTS.ipynb +++ b/nb/Orpheus_(3B)-TTS.ipynb @@ -2158,20 +2158,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Oute_TTS_(1B).ipynb b/nb/Oute_TTS_(1B).ipynb index 21397e7ab..3a3413396 100644 --- a/nb/Oute_TTS_(1B).ipynb +++ b/nb/Oute_TTS_(1B).ipynb @@ -4563,20 +4563,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Paddle_OCR_(1B)_Vision.ipynb b/nb/Paddle_OCR_(1B)_Vision.ipynb index a88a5fe05..3a078b6ce 100644 --- a/nb/Paddle_OCR_(1B)_Vision.ipynb +++ b/nb/Paddle_OCR_(1B)_Vision.ipynb @@ -1644,10 +1644,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Phi_3.5_Mini-Conversational.ipynb b/nb/Phi_3.5_Mini-Conversational.ipynb index 21bf0b271..bf1d0a835 100644 --- a/nb/Phi_3.5_Mini-Conversational.ipynb +++ b/nb/Phi_3.5_Mini-Conversational.ipynb @@ -1327,20 +1327,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Phi_3_Medium-Conversational.ipynb b/nb/Phi_3_Medium-Conversational.ipynb index 4f263d80e..e8d492739 100644 --- a/nb/Phi_3_Medium-Conversational.ipynb +++ b/nb/Phi_3_Medium-Conversational.ipynb @@ -1449,20 +1449,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Phi_4-Conversational.ipynb b/nb/Phi_4-Conversational.ipynb index 9e0978237..9ab77277b 100644 --- a/nb/Phi_4-Conversational.ipynb +++ b/nb/Phi_4-Conversational.ipynb @@ -1500,20 +1500,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb index 98c97e009..d48b3a4de 100644 --- a/nb/Phi_4_(14B)-GRPO.ipynb +++ b/nb/Phi_4_(14B)-GRPO.ipynb @@ -5322,20 +5322,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Pixtral_(12B)-Vision.ipynb b/nb/Pixtral_(12B)-Vision.ipynb index 010e35d1c..77881eaee 100644 --- a/nb/Pixtral_(12B)-Vision.ipynb +++ b/nb/Pixtral_(12B)-Vision.ipynb @@ -1238,10 +1238,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb index 350bddfb2..6d5f08012 100644 --- a/nb/Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Qwen2.5_(3B)-GRPO.ipynb @@ -8232,20 +8232,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen2.5_(7B)-Alpaca.ipynb b/nb/Qwen2.5_(7B)-Alpaca.ipynb index 41623a32a..6a272b948 100644 --- a/nb/Qwen2.5_(7B)-Alpaca.ipynb +++ b/nb/Qwen2.5_(7B)-Alpaca.ipynb @@ -1247,20 +1247,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen2.5_Coder_(14B)-Conversational.ipynb b/nb/Qwen2.5_Coder_(14B)-Conversational.ipynb index 8c2f47570..e5c07cba7 100644 --- a/nb/Qwen2.5_Coder_(14B)-Conversational.ipynb +++ b/nb/Qwen2.5_Coder_(14B)-Conversational.ipynb @@ -1537,20 +1537,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen2.5_VL_(7B)-Vision.ipynb b/nb/Qwen2.5_VL_(7B)-Vision.ipynb index d993ece6d..54301e209 100644 --- a/nb/Qwen2.5_VL_(7B)-Vision.ipynb +++ b/nb/Qwen2.5_VL_(7B)-Vision.ipynb @@ -1377,10 +1377,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Qwen2_(7B)-Alpaca.ipynb b/nb/Qwen2_(7B)-Alpaca.ipynb index 75d37d3a7..fd1cf9147 100644 --- a/nb/Qwen2_(7B)-Alpaca.ipynb +++ b/nb/Qwen2_(7B)-Alpaca.ipynb @@ -1312,20 +1312,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen2_5_7B_VL_GRPO.ipynb b/nb/Qwen2_5_7B_VL_GRPO.ipynb index 6a5e7e06f..7b82694c4 100644 --- a/nb/Qwen2_5_7B_VL_GRPO.ipynb +++ b/nb/Qwen2_5_7B_VL_GRPO.ipynb @@ -7006,20 +7006,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Qwen2_VL_(7B)-Vision.ipynb b/nb/Qwen2_VL_(7B)-Vision.ipynb index 38a4ae025..039a7245d 100644 --- a/nb/Qwen2_VL_(7B)-Vision.ipynb +++ b/nb/Qwen2_VL_(7B)-Vision.ipynb @@ -1366,10 +1366,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Qwen3_(14B)-Alpaca.ipynb b/nb/Qwen3_(14B)-Alpaca.ipynb index cb83b835e..0be5b516b 100644 --- a/nb/Qwen3_(14B)-Alpaca.ipynb +++ b/nb/Qwen3_(14B)-Alpaca.ipynb @@ -1413,20 +1413,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(14B)-Reasoning-Conversational.ipynb b/nb/Qwen3_(14B)-Reasoning-Conversational.ipynb index 212596a0d..26f85cd5a 100644 --- a/nb/Qwen3_(14B)-Reasoning-Conversational.ipynb +++ b/nb/Qwen3_(14B)-Reasoning-Conversational.ipynb @@ -1647,23 +1647,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(14B).ipynb b/nb/Qwen3_(14B).ipynb index 61d6eb9c0..b31dbc3e5 100644 --- a/nb/Qwen3_(14B).ipynb +++ b/nb/Qwen3_(14B).ipynb @@ -1444,23 +1444,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb b/nb/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb index ba914f292..e3ef2ab5d 100644 --- a/nb/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb +++ b/nb/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb @@ -1724,23 +1724,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(4B)-GRPO.ipynb b/nb/Qwen3_(4B)-GRPO.ipynb index a75635ee1..9f88317b1 100644 --- a/nb/Qwen3_(4B)-GRPO.ipynb +++ b/nb/Qwen3_(4B)-GRPO.ipynb @@ -5625,20 +5625,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(4B)-Instruct.ipynb b/nb/Qwen3_(4B)-Instruct.ipynb index 0aade6595..df7d806c7 100644 --- a/nb/Qwen3_(4B)-Instruct.ipynb +++ b/nb/Qwen3_(4B)-Instruct.ipynb @@ -1502,23 +1502,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_(4B)-Thinking.ipynb b/nb/Qwen3_(4B)-Thinking.ipynb index b210d0513..4f72b0ad6 100644 --- a/nb/Qwen3_(4B)-Thinking.ipynb +++ b/nb/Qwen3_(4B)-Thinking.ipynb @@ -1518,23 +1518,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_8B_FP8_GRPO.ipynb b/nb/Qwen3_8B_FP8_GRPO.ipynb index a3626a2db..28c2192a0 100644 --- a/nb/Qwen3_8B_FP8_GRPO.ipynb +++ b/nb/Qwen3_8B_FP8_GRPO.ipynb @@ -6652,20 +6652,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Qwen3_VL_(8B)-Vision-GRPO.ipynb b/nb/Qwen3_VL_(8B)-Vision-GRPO.ipynb index 2af757833..d888ada88 100644 --- a/nb/Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/nb/Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -4847,20 +4847,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/Qwen3_VL_(8B)-Vision.ipynb b/nb/Qwen3_VL_(8B)-Vision.ipynb index 6f435467f..13c6b9574 100644 --- a/nb/Qwen3_VL_(8B)-Vision.ipynb +++ b/nb/Qwen3_VL_(8B)-Vision.ipynb @@ -1075,10 +1075,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/nb/Sesame_CSM_(1B)-TTS.ipynb b/nb/Sesame_CSM_(1B)-TTS.ipynb index 2ac7d6c2a..b2993223a 100644 --- a/nb/Sesame_CSM_(1B)-TTS.ipynb +++ b/nb/Sesame_CSM_(1B)-TTS.ipynb @@ -918,19 +918,19 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", processor, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", processor, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", processor, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", processor, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", " processor.save_pretrained(\"model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", " processor.push_to_hub(\"hf/model\", token = \"\")\n" ] }, diff --git a/nb/Spark_TTS_(0_5B).ipynb b/nb/Spark_TTS_(0_5B).ipynb index 08c48ff09..00c2410cc 100644 --- a/nb/Spark_TTS_(0_5B).ipynb +++ b/nb/Spark_TTS_(0_5B).ipynb @@ -1518,20 +1518,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/TinyLlama_(1.1B)-Alpaca.ipynb b/nb/TinyLlama_(1.1B)-Alpaca.ipynb index a9d4f1125..2c3b802b3 100644 --- a/nb/TinyLlama_(1.1B)-Alpaca.ipynb +++ b/nb/TinyLlama_(1.1B)-Alpaca.ipynb @@ -2460,20 +2460,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/Whisper.ipynb b/nb/Whisper.ipynb index fdabb98ac..44a2e40d1 100644 --- a/nb/Whisper.ipynb +++ b/nb/Whisper.ipynb @@ -1090,20 +1090,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = None,)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = None,)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/nb/gpt-oss-(120B)_A100-Fine-tuning.ipynb b/nb/gpt-oss-(120B)_A100-Fine-tuning.ipynb index 748f6f239..601c136b3 100644 --- a/nb/gpt-oss-(120B)_A100-Fine-tuning.ipynb +++ b/nb/gpt-oss-(120B)_A100-Fine-tuning.ipynb @@ -1693,21 +1693,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/gpt-oss-(20B)-Fine-tuning.ipynb b/nb/gpt-oss-(20B)-Fine-tuning.ipynb index c22bee081..95e82f703 100644 --- a/nb/gpt-oss-(20B)-Fine-tuning.ipynb +++ b/nb/gpt-oss-(20B)-Fine-tuning.ipynb @@ -1422,14 +1422,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt-oss-(20B)-GRPO.ipynb b/nb/gpt-oss-(20B)-GRPO.ipynb index 280d48a5b..f1e4b44e4 100644 --- a/nb/gpt-oss-(20B)-GRPO.ipynb +++ b/nb/gpt-oss-(20B)-GRPO.ipynb @@ -5813,14 +5813,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt-oss-(20B)_A100-GRPO.ipynb b/nb/gpt-oss-(20B)_A100-GRPO.ipynb index fe7772ccd..edd3bb39d 100644 --- a/nb/gpt-oss-(20B)_A100-GRPO.ipynb +++ b/nb/gpt-oss-(20B)_A100-GRPO.ipynb @@ -1693,21 +1693,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/nb/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb b/nb/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb index 9a5df5e85..f90738755 100644 --- a/nb/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb +++ b/nb/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb @@ -914,14 +914,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb index a5f7ddb83..1f2391495 100644 --- a/nb/gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/nb/gpt_oss_(20B)_GRPO_BF16.ipynb @@ -6217,14 +6217,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb index c67677acc..20da633fc 100644 --- a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb +++ b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.ipynb @@ -5264,15 +5264,15 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", "if False:\n", - " model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb index 8b3bf0762..8678cb000 100644 --- a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb +++ b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.ipynb @@ -12097,15 +12097,15 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", "if False:\n", - " model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.ipynb b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.ipynb index a5ab441b8..9d9e948aa 100644 --- a/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.ipynb +++ b/nb/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.ipynb @@ -5250,15 +5250,15 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", "if False:\n", - " model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/nb/nemo_gym_sudoku.ipynb b/nb/nemo_gym_sudoku.ipynb index 6facc7f96..01be43bb7 100644 --- a/nb/nemo_gym_sudoku.ipynb +++ b/nb/nemo_gym_sudoku.ipynb @@ -556,14 +556,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb index 12f5a1409..526f72264 100644 --- a/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb +++ b/original_template/Advanced_Llama3_1_(3B)_GRPO_LoRA.ipynb @@ -9656,20 +9656,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb index b5016295d..1d6e7d94f 100644 --- a/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb +++ b/original_template/Advanced_Llama3_2_(3B)_GRPO_LoRA.ipynb @@ -12668,20 +12668,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb b/original_template/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb index a0fd12a58..f0b5947fc 100644 --- a/original_template/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb +++ b/original_template/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.ipynb @@ -1448,20 +1448,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/CodeGemma_(7B)-Conversational.ipynb b/original_template/CodeGemma_(7B)-Conversational.ipynb index 3122af56a..658b59871 100644 --- a/original_template/CodeGemma_(7B)-Conversational.ipynb +++ b/original_template/CodeGemma_(7B)-Conversational.ipynb @@ -801,20 +801,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb index 0f36aad27..6ff0cc1d7 100644 --- a/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb +++ b/original_template/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.ipynb @@ -5001,20 +5001,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Deepseek_OCR_(3B)-Eval.ipynb b/original_template/Deepseek_OCR_(3B)-Eval.ipynb index 19960ec50..c709bfecb 100644 --- a/original_template/Deepseek_OCR_(3B)-Eval.ipynb +++ b/original_template/Deepseek_OCR_(3B)-Eval.ipynb @@ -1624,10 +1624,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/original_template/Deepseek_OCR_(3B)-Evaluation.ipynb b/original_template/Deepseek_OCR_(3B)-Evaluation.ipynb index 094cea2ad..c74f5d852 100644 --- a/original_template/Deepseek_OCR_(3B)-Evaluation.ipynb +++ b/original_template/Deepseek_OCR_(3B)-Evaluation.ipynb @@ -1938,10 +1938,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/original_template/Deepseek_OCR_(3B).ipynb b/original_template/Deepseek_OCR_(3B).ipynb index db108f3c2..cb7011dfd 100644 --- a/original_template/Deepseek_OCR_(3B).ipynb +++ b/original_template/Deepseek_OCR_(3B).ipynb @@ -1624,10 +1624,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb b/original_template/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb index 965cd9450..583fdba5a 100644 --- a/original_template/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb +++ b/original_template/ERNIE_4_5_21B_A3B_PT-Conversational.ipynb @@ -1669,23 +1669,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb b/original_template/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb index 82a496c4b..bad9908c7 100644 --- a/original_template/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb +++ b/original_template/ERNIE_4_5_VL_28B_A3B_PT_Vision.ipynb @@ -2066,10 +2066,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb b/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb index 8ec9a49a5..f5fccae27 100644 --- a/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb +++ b/original_template/Falcon_H1_(0.5B)-Alpaca.ipynb @@ -982,20 +982,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Gemma2_(2B)-Alpaca.ipynb b/original_template/Gemma2_(2B)-Alpaca.ipynb index 9d57c5072..78a8be33e 100644 --- a/original_template/Gemma2_(2B)-Alpaca.ipynb +++ b/original_template/Gemma2_(2B)-Alpaca.ipynb @@ -1174,20 +1174,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Gemma2_(9B)-Alpaca.ipynb b/original_template/Gemma2_(9B)-Alpaca.ipynb index f6899e709..f1f493a4f 100644 --- a/original_template/Gemma2_(9B)-Alpaca.ipynb +++ b/original_template/Gemma2_(9B)-Alpaca.ipynb @@ -1201,20 +1201,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Gemma3N_(4B)-Audio.ipynb b/original_template/Gemma3N_(4B)-Audio.ipynb index b9599ecaa..eb99c38cd 100644 --- a/original_template/Gemma3N_(4B)-Audio.ipynb +++ b/original_template/Gemma3N_(4B)-Audio.ipynb @@ -1543,7 +1543,7 @@ "outputs": [], "source": [ "if True: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3n\", processor)" + " model.save_pretrained_merged(\"gemma-3n-merged\", processor)" ] }, { diff --git a/original_template/Gemma3N_(4B)-Conversational.ipynb b/original_template/Gemma3N_(4B)-Conversational.ipynb index 6b5dc08a7..6e2265c09 100644 --- a/original_template/Gemma3N_(4B)-Conversational.ipynb +++ b/original_template/Gemma3N_(4B)-Conversational.ipynb @@ -1940,7 +1940,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3N-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3N-finetune-merged\", tokenizer)" ] }, { diff --git a/original_template/Gemma3N_(4B)-Vision.ipynb b/original_template/Gemma3N_(4B)-Vision.ipynb index d3a62b70e..e4e633bef 100644 --- a/original_template/Gemma3N_(4B)-Vision.ipynb +++ b/original_template/Gemma3N_(4B)-Vision.ipynb @@ -1458,10 +1458,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] }, { diff --git a/original_template/Gemma3_(1B)-GRPO.ipynb b/original_template/Gemma3_(1B)-GRPO.ipynb index ee0615ba7..f369182bb 100644 --- a/original_template/Gemma3_(1B)-GRPO.ipynb +++ b/original_template/Gemma3_(1B)-GRPO.ipynb @@ -1836,7 +1836,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/original_template/Gemma3_(270M).ipynb b/original_template/Gemma3_(270M).ipynb index e8703784e..0e34c2871 100644 --- a/original_template/Gemma3_(270M).ipynb +++ b/original_template/Gemma3_(270M).ipynb @@ -1644,23 +1644,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gemma-3-finetune\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gemma-3-finetune-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gemma-3-finetune\")\n", - " tokenizer.save_pretrained(\"gemma-3-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gemma-3-finetune\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Gemma3_(27B)_A100-Conversational.ipynb b/original_template/Gemma3_(27B)_A100-Conversational.ipynb index f2b20c284..ed05de905 100644 --- a/original_template/Gemma3_(27B)_A100-Conversational.ipynb +++ b/original_template/Gemma3_(27B)_A100-Conversational.ipynb @@ -985,7 +985,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb index 8cc148681..16ef77d6f 100644 --- a/original_template/Gemma3_(4B)-Vision-GRPO.ipynb +++ b/original_template/Gemma3_(4B)-Vision-GRPO.ipynb @@ -1928,10 +1928,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Gemma3_(4B)-Vision.ipynb b/original_template/Gemma3_(4B)-Vision.ipynb index a93fd6659..b15fc0713 100644 --- a/original_template/Gemma3_(4B)-Vision.ipynb +++ b/original_template/Gemma3_(4B)-Vision.ipynb @@ -1427,10 +1427,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", processor,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", processor,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", processor, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", processor, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Gemma3_(4B).ipynb b/original_template/Gemma3_(4B).ipynb index b0443b692..a6e3edf8f 100644 --- a/original_template/Gemma3_(4B).ipynb +++ b/original_template/Gemma3_(4B).ipynb @@ -1213,7 +1213,7 @@ "outputs": [], "source": [ "if False: # Change to True to save finetune!\n", - " model.save_pretrained_merged(\"gemma-3-finetune\", tokenizer)" + " model.save_pretrained_merged(\"gemma-3-finetune-merged\", tokenizer)" ] }, { diff --git a/original_template/Granite4.0.ipynb b/original_template/Granite4.0.ipynb index 5a525a68e..580f3079f 100644 --- a/original_template/Granite4.0.ipynb +++ b/original_template/Granite4.0.ipynb @@ -1416,23 +1416,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Granite4.0_350M.ipynb b/original_template/Granite4.0_350M.ipynb index 65502d32b..7ed8cbb42 100644 --- a/original_template/Granite4.0_350M.ipynb +++ b/original_template/Granite4.0_350M.ipynb @@ -1595,23 +1595,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Liquid_LFM2_(1.2B)-Conversational.ipynb b/original_template/Liquid_LFM2_(1.2B)-Conversational.ipynb index 65aee86fe..3c5ae0591 100644 --- a/original_template/Liquid_LFM2_(1.2B)-Conversational.ipynb +++ b/original_template/Liquid_LFM2_(1.2B)-Conversational.ipynb @@ -1516,20 +1516,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/original_template/Llama3.1_(8B)-Alpaca.ipynb b/original_template/Llama3.1_(8B)-Alpaca.ipynb index f68166746..9d4ba8bd0 100644 --- a/original_template/Llama3.1_(8B)-Alpaca.ipynb +++ b/original_template/Llama3.1_(8B)-Alpaca.ipynb @@ -1142,20 +1142,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3.1_(8B)-GRPO.ipynb b/original_template/Llama3.1_(8B)-GRPO.ipynb index 38b2317a1..afe66a5c8 100644 --- a/original_template/Llama3.1_(8B)-GRPO.ipynb +++ b/original_template/Llama3.1_(8B)-GRPO.ipynb @@ -9332,20 +9332,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3.2_(11B)-Vision.ipynb b/original_template/Llama3.2_(11B)-Vision.ipynb index 654595f75..b5741a401 100644 --- a/original_template/Llama3.2_(11B)-Vision.ipynb +++ b/original_template/Llama3.2_(11B)-Vision.ipynb @@ -1331,10 +1331,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Llama3.2_(1B)-RAFT.ipynb b/original_template/Llama3.2_(1B)-RAFT.ipynb index 0cf923544..73d32ad07 100644 --- a/original_template/Llama3.2_(1B)-RAFT.ipynb +++ b/original_template/Llama3.2_(1B)-RAFT.ipynb @@ -1332,20 +1332,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3.2_(1B_and_3B)-Conversational.ipynb b/original_template/Llama3.2_(1B_and_3B)-Conversational.ipynb index 19e14b547..eef0357b1 100644 --- a/original_template/Llama3.2_(1B_and_3B)-Conversational.ipynb +++ b/original_template/Llama3.2_(1B_and_3B)-Conversational.ipynb @@ -1280,20 +1280,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3.3_(70B)_A100-Conversational.ipynb b/original_template/Llama3.3_(70B)_A100-Conversational.ipynb index fa4812c64..d3c1feef1 100644 --- a/original_template/Llama3.3_(70B)_A100-Conversational.ipynb +++ b/original_template/Llama3.3_(70B)_A100-Conversational.ipynb @@ -1676,20 +1676,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3_(8B)-Alpaca.ipynb b/original_template/Llama3_(8B)-Alpaca.ipynb index 3bff4bdd2..16a250748 100644 --- a/original_template/Llama3_(8B)-Alpaca.ipynb +++ b/original_template/Llama3_(8B)-Alpaca.ipynb @@ -1195,20 +1195,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3_(8B)-Conversational.ipynb b/original_template/Llama3_(8B)-Conversational.ipynb index a83da4d84..f3be6fbc1 100644 --- a/original_template/Llama3_(8B)-Conversational.ipynb +++ b/original_template/Llama3_(8B)-Conversational.ipynb @@ -1295,20 +1295,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama3_(8B)-ORPO.ipynb b/original_template/Llama3_(8B)-ORPO.ipynb index 62bf6503a..c8ae0b8b8 100644 --- a/original_template/Llama3_(8B)-ORPO.ipynb +++ b/original_template/Llama3_(8B)-ORPO.ipynb @@ -1360,20 +1360,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llama_FP8_GRPO.ipynb b/original_template/Llama_FP8_GRPO.ipynb index 4c4bb7927..a885bd766 100644 --- a/original_template/Llama_FP8_GRPO.ipynb +++ b/original_template/Llama_FP8_GRPO.ipynb @@ -6524,20 +6524,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Llasa_TTS_(1B).ipynb b/original_template/Llasa_TTS_(1B).ipynb index 4bb277841..0b65ad466 100644 --- a/original_template/Llasa_TTS_(1B).ipynb +++ b/original_template/Llasa_TTS_(1B).ipynb @@ -1512,20 +1512,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] } ], diff --git a/original_template/Llasa_TTS_(3B).ipynb b/original_template/Llasa_TTS_(3B).ipynb index 7847a51a2..0fe8d9ab9 100644 --- a/original_template/Llasa_TTS_(3B).ipynb +++ b/original_template/Llasa_TTS_(3B).ipynb @@ -1504,20 +1504,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] } ], diff --git a/original_template/Magistral_(24B)-Reasoning-Conversational.ipynb b/original_template/Magistral_(24B)-Reasoning-Conversational.ipynb index f04bc5928..77838cdf2 100644 --- a/original_template/Magistral_(24B)-Reasoning-Conversational.ipynb +++ b/original_template/Magistral_(24B)-Reasoning-Conversational.ipynb @@ -1144,23 +1144,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Meta-Synthetic-Data-Llama3.1_(8B).ipynb b/original_template/Meta-Synthetic-Data-Llama3.1_(8B).ipynb index a6ea6ca4c..a9c2a67ba 100644 --- a/original_template/Meta-Synthetic-Data-Llama3.1_(8B).ipynb +++ b/original_template/Meta-Synthetic-Data-Llama3.1_(8B).ipynb @@ -1724,20 +1724,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Meta_Synthetic_Data_Llama3_2_(3B).ipynb b/original_template/Meta_Synthetic_Data_Llama3_2_(3B).ipynb index 64f40c9e0..488fb41b2 100644 --- a/original_template/Meta_Synthetic_Data_Llama3_2_(3B).ipynb +++ b/original_template/Meta_Synthetic_Data_Llama3_2_(3B).ipynb @@ -1651,23 +1651,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Change to True to upload finetune\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb b/original_template/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb index fe964ef15..cbe07d78c 100644 --- a/original_template/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb +++ b/original_template/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.ipynb @@ -10925,20 +10925,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Ministral_3_VL_(3B)_Vision.ipynb b/original_template/Ministral_3_VL_(3B)_Vision.ipynb index 45af212b6..c8ed6fa69 100644 --- a/original_template/Ministral_3_VL_(3B)_Vision.ipynb +++ b/original_template/Ministral_3_VL_(3B)_Vision.ipynb @@ -1405,10 +1405,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/original_template/Mistral_Nemo_(12B)-Alpaca.ipynb b/original_template/Mistral_Nemo_(12B)-Alpaca.ipynb index e194e5f4e..b720675cd 100644 --- a/original_template/Mistral_Nemo_(12B)-Alpaca.ipynb +++ b/original_template/Mistral_Nemo_(12B)-Alpaca.ipynb @@ -1308,20 +1308,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Mistral_Small_(22B)-Alpaca.ipynb b/original_template/Mistral_Small_(22B)-Alpaca.ipynb index fcde28ff3..487a4e7d4 100644 --- a/original_template/Mistral_Small_(22B)-Alpaca.ipynb +++ b/original_template/Mistral_Small_(22B)-Alpaca.ipynb @@ -1294,20 +1294,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Mistral_v0.3_(7B)-Alpaca.ipynb b/original_template/Mistral_v0.3_(7B)-Alpaca.ipynb index 6202fb3fd..fdfb4700c 100644 --- a/original_template/Mistral_v0.3_(7B)-Alpaca.ipynb +++ b/original_template/Mistral_v0.3_(7B)-Alpaca.ipynb @@ -1231,20 +1231,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Mistral_v0.3_(7B)-CPT.ipynb b/original_template/Mistral_v0.3_(7B)-CPT.ipynb index cb4d0e9ae..f8ea9f25b 100644 --- a/original_template/Mistral_v0.3_(7B)-CPT.ipynb +++ b/original_template/Mistral_v0.3_(7B)-CPT.ipynb @@ -696,20 +696,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Mistral_v0.3_(7B)-Conversational.ipynb b/original_template/Mistral_v0.3_(7B)-Conversational.ipynb index b7ba82f9c..7ec9c35e4 100644 --- a/original_template/Mistral_v0.3_(7B)-Conversational.ipynb +++ b/original_template/Mistral_v0.3_(7B)-Conversational.ipynb @@ -1354,20 +1354,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb index e8e8d23ac..f155cae0a 100644 --- a/original_template/Mistral_v0.3_(7B)-GRPO.ipynb +++ b/original_template/Mistral_v0.3_(7B)-GRPO.ipynb @@ -8629,20 +8629,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Nemotron-3-Nano-30B-A3B_A100.ipynb b/original_template/Nemotron-3-Nano-30B-A3B_A100.ipynb index be90b8bb0..475d1dab2 100644 --- a/original_template/Nemotron-3-Nano-30B-A3B_A100.ipynb +++ b/original_template/Nemotron-3-Nano-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Nemotron-Nano-3-30B-A3B_A100.ipynb b/original_template/Nemotron-Nano-3-30B-A3B_A100.ipynb index be90b8bb0..475d1dab2 100644 --- a/original_template/Nemotron-Nano-3-30B-A3B_A100.ipynb +++ b/original_template/Nemotron-Nano-3-30B-A3B_A100.ipynb @@ -1774,23 +1774,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Orpheus_(3B)-TTS.ipynb b/original_template/Orpheus_(3B)-TTS.ipynb index 6b0bbef25..eb9e4a013 100644 --- a/original_template/Orpheus_(3B)-TTS.ipynb +++ b/original_template/Orpheus_(3B)-TTS.ipynb @@ -2135,20 +2135,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] } ], diff --git a/original_template/Oute_TTS_(1B).ipynb b/original_template/Oute_TTS_(1B).ipynb index 0fea6ca4a..3c9b96e6c 100644 --- a/original_template/Oute_TTS_(1B).ipynb +++ b/original_template/Oute_TTS_(1B).ipynb @@ -4577,20 +4577,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Paddle_OCR_(1B)_Vision.ipynb b/original_template/Paddle_OCR_(1B)_Vision.ipynb index 23c3da9fa..29c211512 100644 --- a/original_template/Paddle_OCR_(1B)_Vision.ipynb +++ b/original_template/Paddle_OCR_(1B)_Vision.ipynb @@ -1621,10 +1621,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Phi_3.5_Mini-Conversational.ipynb b/original_template/Phi_3.5_Mini-Conversational.ipynb index ef797979d..60d55fc71 100644 --- a/original_template/Phi_3.5_Mini-Conversational.ipynb +++ b/original_template/Phi_3.5_Mini-Conversational.ipynb @@ -1304,20 +1304,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Phi_3_Medium-Conversational.ipynb b/original_template/Phi_3_Medium-Conversational.ipynb index f6d103447..5a9463b3b 100644 --- a/original_template/Phi_3_Medium-Conversational.ipynb +++ b/original_template/Phi_3_Medium-Conversational.ipynb @@ -1426,20 +1426,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Phi_4-Conversational.ipynb b/original_template/Phi_4-Conversational.ipynb index 6039fb2e8..2272799f9 100644 --- a/original_template/Phi_4-Conversational.ipynb +++ b/original_template/Phi_4-Conversational.ipynb @@ -1477,20 +1477,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Phi_4_(14B)-GRPO.ipynb b/original_template/Phi_4_(14B)-GRPO.ipynb index 0346fb2dc..9c09e8ec1 100644 --- a/original_template/Phi_4_(14B)-GRPO.ipynb +++ b/original_template/Phi_4_(14B)-GRPO.ipynb @@ -5301,20 +5301,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Pixtral_(12B)-Vision.ipynb b/original_template/Pixtral_(12B)-Vision.ipynb index f3a973471..6c67a7dd2 100644 --- a/original_template/Pixtral_(12B)-Vision.ipynb +++ b/original_template/Pixtral_(12B)-Vision.ipynb @@ -1215,10 +1215,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Qwen2.5_(3B)-GRPO.ipynb b/original_template/Qwen2.5_(3B)-GRPO.ipynb index a1c580943..55478401a 100644 --- a/original_template/Qwen2.5_(3B)-GRPO.ipynb +++ b/original_template/Qwen2.5_(3B)-GRPO.ipynb @@ -8211,20 +8211,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen2.5_(7B)-Alpaca.ipynb b/original_template/Qwen2.5_(7B)-Alpaca.ipynb index d72caaee5..254dccf3d 100644 --- a/original_template/Qwen2.5_(7B)-Alpaca.ipynb +++ b/original_template/Qwen2.5_(7B)-Alpaca.ipynb @@ -1224,20 +1224,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen2.5_Coder_(14B)-Conversational.ipynb b/original_template/Qwen2.5_Coder_(14B)-Conversational.ipynb index a6acab4cd..5983f6eae 100644 --- a/original_template/Qwen2.5_Coder_(14B)-Conversational.ipynb +++ b/original_template/Qwen2.5_Coder_(14B)-Conversational.ipynb @@ -1514,20 +1514,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen2.5_VL_(7B)-Vision.ipynb b/original_template/Qwen2.5_VL_(7B)-Vision.ipynb index 66476aea5..5d3d9bd73 100644 --- a/original_template/Qwen2.5_VL_(7B)-Vision.ipynb +++ b/original_template/Qwen2.5_VL_(7B)-Vision.ipynb @@ -1354,10 +1354,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Qwen2_(7B)-Alpaca.ipynb b/original_template/Qwen2_(7B)-Alpaca.ipynb index 21fdf37be..6bf046206 100644 --- a/original_template/Qwen2_(7B)-Alpaca.ipynb +++ b/original_template/Qwen2_(7B)-Alpaca.ipynb @@ -1289,20 +1289,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen2_5_7B_VL_GRPO.ipynb b/original_template/Qwen2_5_7B_VL_GRPO.ipynb index 812e1d4db..909194b2a 100644 --- a/original_template/Qwen2_5_7B_VL_GRPO.ipynb +++ b/original_template/Qwen2_5_7B_VL_GRPO.ipynb @@ -6985,20 +6985,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/original_template/Qwen2_VL_(7B)-Vision.ipynb b/original_template/Qwen2_VL_(7B)-Vision.ipynb index e7226d90a..a66b1d727 100644 --- a/original_template/Qwen2_VL_(7B)-Vision.ipynb +++ b/original_template/Qwen2_VL_(7B)-Vision.ipynb @@ -1343,10 +1343,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] } ], diff --git a/original_template/Qwen3_(14B)-Alpaca.ipynb b/original_template/Qwen3_(14B)-Alpaca.ipynb index afe831b5d..0de5330da 100644 --- a/original_template/Qwen3_(14B)-Alpaca.ipynb +++ b/original_template/Qwen3_(14B)-Alpaca.ipynb @@ -1390,20 +1390,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(14B)-Reasoning-Conversational.ipynb b/original_template/Qwen3_(14B)-Reasoning-Conversational.ipynb index 891eb69bb..ea8e166a9 100644 --- a/original_template/Qwen3_(14B)-Reasoning-Conversational.ipynb +++ b/original_template/Qwen3_(14B)-Reasoning-Conversational.ipynb @@ -1624,23 +1624,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(14B).ipynb b/original_template/Qwen3_(14B).ipynb index dc1e94b87..0ba7e3b52 100644 --- a/original_template/Qwen3_(14B).ipynb +++ b/original_template/Qwen3_(14B).ipynb @@ -1421,23 +1421,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb b/original_template/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb index 063b05e1c..4cf759238 100644 --- a/original_template/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb +++ b/original_template/Qwen3_(32B)_A100-Reasoning-Conversational.ipynb @@ -1701,23 +1701,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(4B)-GRPO.ipynb b/original_template/Qwen3_(4B)-GRPO.ipynb index 2971f4975..9040e57fc 100644 --- a/original_template/Qwen3_(4B)-GRPO.ipynb +++ b/original_template/Qwen3_(4B)-GRPO.ipynb @@ -5604,20 +5604,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(4B)-Instruct.ipynb b/original_template/Qwen3_(4B)-Instruct.ipynb index a38ccff9e..6f3da0ac2 100644 --- a/original_template/Qwen3_(4B)-Instruct.ipynb +++ b/original_template/Qwen3_(4B)-Instruct.ipynb @@ -1479,23 +1479,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_(4B)-Thinking.ipynb b/original_template/Qwen3_(4B)-Thinking.ipynb index a34a29186..3b17bd67d 100644 --- a/original_template/Qwen3_(4B)-Thinking.ipynb +++ b/original_template/Qwen3_(4B)-Thinking.ipynb @@ -1495,23 +1495,23 @@ "source": [ "# Merge to 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", + " model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", + " model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_8B_FP8_GRPO.ipynb b/original_template/Qwen3_8B_FP8_GRPO.ipynb index 0accd2621..4fcb7b0e0 100644 --- a/original_template/Qwen3_8B_FP8_GRPO.ipynb +++ b/original_template/Qwen3_8B_FP8_GRPO.ipynb @@ -6581,20 +6581,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb index 67aac49da..e4a683188 100644 --- a/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb +++ b/original_template/Qwen3_VL_(8B)-Vision-GRPO.ipynb @@ -4833,20 +4833,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/original_template/Qwen3_VL_(8B)-Vision.ipynb b/original_template/Qwen3_VL_(8B)-Vision.ipynb index cc074d093..cb0602ac2 100644 --- a/original_template/Qwen3_VL_(8B)-Vision.ipynb +++ b/original_template/Qwen3_VL_(8B)-Vision.ipynb @@ -1052,10 +1052,10 @@ "# Select ONLY 1 to save! (Both not needed!)\n", "\n", "# Save locally to 16bit\n", - "if False: model.save_pretrained_merged(\"unsloth_finetune\", tokenizer,)\n", + "if False: model.save_pretrained_merged(\"unsloth_finetune-merged\", tokenizer,)\n", "\n", "# To export and save to your Hugging Face account\n", - "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune\", tokenizer, token = \"PUT_HERE\")" + "if False: model.push_to_hub_merged(\"YOUR_USERNAME/unsloth_finetune-merged\", tokenizer, token = \"PUT_HERE\")" ] }, { diff --git a/original_template/Sesame_CSM_(1B)-TTS.ipynb b/original_template/Sesame_CSM_(1B)-TTS.ipynb index 11bcd1f84..1ad73742c 100644 --- a/original_template/Sesame_CSM_(1B)-TTS.ipynb +++ b/original_template/Sesame_CSM_(1B)-TTS.ipynb @@ -895,19 +895,19 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", processor, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", processor, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", processor, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", processor, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", processor, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", processor, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", " processor.save_pretrained(\"model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", " processor.push_to_hub(\"hf/model\", token = \"\")\n" ] } diff --git a/original_template/Spark_TTS_(0_5B).ipynb b/original_template/Spark_TTS_(0_5B).ipynb index 61f4cada2..4e9108580 100644 --- a/original_template/Spark_TTS_(0_5B).ipynb +++ b/original_template/Spark_TTS_(0_5B).ipynb @@ -1495,20 +1495,20 @@ ], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] } ], diff --git a/original_template/TinyLlama_(1.1B)-Alpaca.ipynb b/original_template/TinyLlama_(1.1B)-Alpaca.ipynb index bc247815c..714b94a35 100644 --- a/original_template/TinyLlama_(1.1B)-Alpaca.ipynb +++ b/original_template/TinyLlama_(1.1B)-Alpaca.ipynb @@ -2437,20 +2437,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = \"merged_16bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/Whisper.ipynb b/original_template/Whisper.ipynb index 28f68fe60..cd5231fd5 100644 --- a/original_template/Whisper.ipynb +++ b/original_template/Whisper.ipynb @@ -1101,20 +1101,20 @@ "outputs": [], "source": [ "# Merge to 16bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = None,)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged\", tokenizer, save_method = None,)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Merge to 4bit\n", - "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n", - "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", + "if False: model.save_pretrained_merged(\"model-merged-4bit\", tokenizer, save_method = \"merged_4bit\",)\n", + "if False: model.push_to_hub_merged(\"hf/model-merged-4bit\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"model\")\n", - " tokenizer.save_pretrained(\"model\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False:\n", - " model.push_to_hub(\"hf/model\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/model\", token = \"\")\n" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")\n" ] }, { diff --git a/original_template/gpt-oss-(120B)_A100-Fine-tuning.ipynb b/original_template/gpt-oss-(120B)_A100-Fine-tuning.ipynb index 3dc6aaeca..0287f63a8 100644 --- a/original_template/gpt-oss-(120B)_A100-Fine-tuning.ipynb +++ b/original_template/gpt-oss-(120B)_A100-Fine-tuning.ipynb @@ -1670,21 +1670,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/original_template/gpt-oss-(20B)-Fine-tuning.ipynb b/original_template/gpt-oss-(20B)-Fine-tuning.ipynb index efeb3fd09..788bf2df4 100644 --- a/original_template/gpt-oss-(20B)-Fine-tuning.ipynb +++ b/original_template/gpt-oss-(20B)-Fine-tuning.ipynb @@ -1399,14 +1399,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/original_template/gpt-oss-(20B)-GRPO.ipynb b/original_template/gpt-oss-(20B)-GRPO.ipynb index 690bdc0df..3774fa66a 100644 --- a/original_template/gpt-oss-(20B)-GRPO.ipynb +++ b/original_template/gpt-oss-(20B)-GRPO.ipynb @@ -5828,14 +5828,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/original_template/gpt-oss-(20B)_A100-GRPO.ipynb b/original_template/gpt-oss-(20B)_A100-GRPO.ipynb index c85563cf6..b73439965 100644 --- a/original_template/gpt-oss-(20B)_A100-GRPO.ipynb +++ b/original_template/gpt-oss-(20B)_A100-GRPO.ipynb @@ -1670,21 +1670,21 @@ "source": [ "# Merge to mxfp 4bit\n", "if False:\n", - " model.save_pretrained_merged(\"gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\",)\n", + " model.save_pretrained_merged(\"gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\",)\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-mxfp4\", tokenizer, save_method = \"mxfp4\", token = \"\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n", "\n", "# Just LoRA adapters\n", "if False:\n", - " model.save_pretrained(\"gpt-oss-finetune\")\n", - " tokenizer.save_pretrained(\"gpt-oss-finetune\")\n", + " model.save_pretrained(\"lora_model\")\n", + " tokenizer.save_pretrained(\"lora_model\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")\n", - " tokenizer.push_to_hub(\"hf/gpt-oss-finetune\", token = \"\")" + " model.push_to_hub(\"hf/lora_model\", token = \"\")\n", + " tokenizer.push_to_hub(\"hf/lora_model\", token = \"\")" ] }, { diff --git a/original_template/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb b/original_template/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb index 4ebfab096..1fc211896 100644 --- a/original_template/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb +++ b/original_template/gpt_oss_(20B)_500K_Context_Fine_tuning.ipynb @@ -856,14 +856,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] }, { diff --git a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb index d701ae5c1..5444a4bd3 100644 --- a/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb +++ b/original_template/gpt_oss_(20B)_GRPO_BF16.ipynb @@ -6194,14 +6194,14 @@ "source": [ "# Merge and push to hub in mxfp4 4bit format\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"mxfp4\")\n", - "if False: model.push_to_hub_merged(\"repo_id/repo_name\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", + " model.save_pretrained_merged(\"finetuned_model-mxfp4\", tokenizer, save_method = \"mxfp4\")\n", + "if False: model.push_to_hub_merged(\"repo_id/repo_name-mxfp4\", tokenizer, token = \"hf...\", save_method = \"mxfp4\")\n", "\n", "# Merge and push to hub in 16bit\n", "if False:\n", - " model.save_pretrained_merged(\"finetuned_model\", tokenizer, save_method = \"merged_16bit\")\n", + " model.save_pretrained_merged(\"finetuned_model-merged\", tokenizer, save_method = \"merged_16bit\")\n", "if False: # Pushing to HF Hub\n", - " model.push_to_hub_merged(\"hf/gpt-oss-finetune\", tokenizer, save_method = \"merged_16bit\", token = \"\")" + " model.push_to_hub_merged(\"hf/gpt-oss-finetune-merged\", tokenizer, save_method = \"merged_16bit\", token = \"\")" ] } ], diff --git a/python_scripts/Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/Advanced_Llama3_1_(3B)_GRPO_LoRA.py index 680eb2561..217574e56 100644 --- a/python_scripts/Advanced_Llama3_1_(3B)_GRPO_LoRA.py +++ b/python_scripts/Advanced_Llama3_1_(3B)_GRPO_LoRA.py @@ -484,20 +484,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 62c781e49..841998b6a 100644 --- a/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -489,20 +489,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py b/python_scripts/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py index 160edf303..07b94b58f 100644 --- a/python_scripts/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py +++ b/python_scripts/CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py @@ -325,20 +325,20 @@ def formatting_prompts_func(example): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/CodeGemma_(7B)-Conversational.py b/python_scripts/CodeGemma_(7B)-Conversational.py index c395330b2..efd3d9f5e 100644 --- a/python_scripts/CodeGemma_(7B)-Conversational.py +++ b/python_scripts/CodeGemma_(7B)-Conversational.py @@ -369,20 +369,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index 0ab454ed0..8afdf29c5 100644 --- a/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -754,20 +754,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Deepseek_OCR_(3B)-Eval.py b/python_scripts/Deepseek_OCR_(3B)-Eval.py index cb388b8bc..974f2f699 100644 --- a/python_scripts/Deepseek_OCR_(3B)-Eval.py +++ b/python_scripts/Deepseek_OCR_(3B)-Eval.py @@ -730,10 +730,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Deepseek_OCR_(3B)-Evaluation.py b/python_scripts/Deepseek_OCR_(3B)-Evaluation.py index d6f3d17a0..723fe4806 100644 --- a/python_scripts/Deepseek_OCR_(3B)-Evaluation.py +++ b/python_scripts/Deepseek_OCR_(3B)-Evaluation.py @@ -901,10 +901,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Deepseek_OCR_(3B).py b/python_scripts/Deepseek_OCR_(3B).py index cb388b8bc..974f2f699 100644 --- a/python_scripts/Deepseek_OCR_(3B).py +++ b/python_scripts/Deepseek_OCR_(3B).py @@ -730,10 +730,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/ERNIE_4_5_21B_A3B_PT-Conversational.py b/python_scripts/ERNIE_4_5_21B_A3B_PT-Conversational.py index 943c89894..bf8a02bdd 100644 --- a/python_scripts/ERNIE_4_5_21B_A3B_PT-Conversational.py +++ b/python_scripts/ERNIE_4_5_21B_A3B_PT-Conversational.py @@ -311,23 +311,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/ERNIE_4_5_VL_28B_A3B_PT_Vision.py b/python_scripts/ERNIE_4_5_VL_28B_A3B_PT_Vision.py index 0b5deba06..efb5c5918 100644 --- a/python_scripts/ERNIE_4_5_VL_28B_A3B_PT_Vision.py +++ b/python_scripts/ERNIE_4_5_VL_28B_A3B_PT_Vision.py @@ -602,10 +602,10 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Falcon_H1_(0.5B)-Alpaca.py b/python_scripts/Falcon_H1_(0.5B)-Alpaca.py index 1a3effe6c..b104da268 100644 --- a/python_scripts/Falcon_H1_(0.5B)-Alpaca.py +++ b/python_scripts/Falcon_H1_(0.5B)-Alpaca.py @@ -331,20 +331,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Gemma2_(2B)-Alpaca.py b/python_scripts/Gemma2_(2B)-Alpaca.py index dc4cd8dd6..453907932 100644 --- a/python_scripts/Gemma2_(2B)-Alpaca.py +++ b/python_scripts/Gemma2_(2B)-Alpaca.py @@ -321,20 +321,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Gemma2_(9B)-Alpaca.py b/python_scripts/Gemma2_(9B)-Alpaca.py index 7a778cde0..1999b98ed 100644 --- a/python_scripts/Gemma2_(9B)-Alpaca.py +++ b/python_scripts/Gemma2_(9B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Gemma3N_(4B)-Audio.py b/python_scripts/Gemma3N_(4B)-Audio.py index 9485e945f..d7e837834 100644 --- a/python_scripts/Gemma3N_(4B)-Audio.py +++ b/python_scripts/Gemma3N_(4B)-Audio.py @@ -455,7 +455,7 @@ def collate_fn(examples): if True: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3n", processor) + model.save_pretrained_merged("gemma-3n-merged", processor) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Gemma3N_(4B)-Conversational.py b/python_scripts/Gemma3N_(4B)-Conversational.py index 2dfd6be47..924c41744 100644 --- a/python_scripts/Gemma3N_(4B)-Conversational.py +++ b/python_scripts/Gemma3N_(4B)-Conversational.py @@ -493,7 +493,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3N-finetune", tokenizer) + model.save_pretrained_merged("gemma-3N-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Gemma3N_(4B)-Vision.py b/python_scripts/Gemma3N_(4B)-Vision.py index 2c18bec19..b3bb5bfa4 100644 --- a/python_scripts/Gemma3N_(4B)-Vision.py +++ b/python_scripts/Gemma3N_(4B)-Vision.py @@ -439,10 +439,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", processor,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", processor,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", processor, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", processor, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Gemma3_(1B)-GRPO.py b/python_scripts/Gemma3_(1B)-GRPO.py index fb81540fb..0b372f5fb 100644 --- a/python_scripts/Gemma3_(1B)-GRPO.py +++ b/python_scripts/Gemma3_(1B)-GRPO.py @@ -443,7 +443,7 @@ def check_numbers(prompts, completions, answer, **kwargs): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Gemma3_(270M).py b/python_scripts/Gemma3_(270M).py index d584838ac..a89d51523 100644 --- a/python_scripts/Gemma3_(270M).py +++ b/python_scripts/Gemma3_(270M).py @@ -332,23 +332,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("gemma-3-finetune", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gemma-3-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gemma-3-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("gemma-3-finetune", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("gemma-3-finetune-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gemma-3-finetune", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/gemma-3-finetune-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gemma-3-finetune") - tokenizer.save_pretrained("gemma-3-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gemma-3-finetune", token = "") - tokenizer.push_to_hub("hf/gemma-3-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Gemma3_(27B)_A100-Conversational.py b/python_scripts/Gemma3_(27B)_A100-Conversational.py index 3008a3bc9..5f067897b 100644 --- a/python_scripts/Gemma3_(27B)_A100-Conversational.py +++ b/python_scripts/Gemma3_(27B)_A100-Conversational.py @@ -375,7 +375,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Gemma3_(4B)-Vision-GRPO.py index d2373a980..141989740 100644 --- a/python_scripts/Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/Gemma3_(4B)-Vision-GRPO.py @@ -468,10 +468,10 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Gemma3_(4B)-Vision.py b/python_scripts/Gemma3_(4B)-Vision.py index 53ebd82c4..bc039155d 100644 --- a/python_scripts/Gemma3_(4B)-Vision.py +++ b/python_scripts/Gemma3_(4B)-Vision.py @@ -432,10 +432,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", processor,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", processor,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", processor, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", processor, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Gemma3_(4B).py b/python_scripts/Gemma3_(4B).py index 48866367d..b3ee1a4cd 100644 --- a/python_scripts/Gemma3_(4B).py +++ b/python_scripts/Gemma3_(4B).py @@ -375,7 +375,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Granite4.0.py b/python_scripts/Granite4.0.py index 98a67e5ca..de52caeb3 100644 --- a/python_scripts/Granite4.0.py +++ b/python_scripts/Granite4.0.py @@ -484,23 +484,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Granite4.0_350M.py b/python_scripts/Granite4.0_350M.py index 93e0de490..a44e3e05c 100644 --- a/python_scripts/Granite4.0_350M.py +++ b/python_scripts/Granite4.0_350M.py @@ -484,23 +484,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py index 0851e39c3..42d609c2b 100644 --- a/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py +++ b/python_scripts/HuggingFace Course-Advanced_Llama3_1_(3B)_GRPO_LoRA.py @@ -488,20 +488,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 62c781e49..841998b6a 100644 --- a/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/HuggingFace Course-Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -489,20 +489,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index 5497af1ed..10fd56da4 100644 --- a/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/HuggingFace Course-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -756,20 +756,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py index 485119cd1..b653c5097 100644 --- a/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Gemma3_(1B)-GRPO.py @@ -445,7 +445,7 @@ def check_numbers(prompts, completions, answer, **kwargs): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py index ea8db231e..067bd6456 100644 --- a/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/HuggingFace Course-Gemma3_(4B)-Vision-GRPO.py @@ -470,10 +470,10 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py index 83e2d757f..58991350c 100644 --- a/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Llama3.1_(8B)-GRPO.py @@ -328,20 +328,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py b/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py index fbfc80f39..48e325a72 100644 --- a/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py +++ b/python_scripts/HuggingFace Course-Llama_FP8_GRPO.py @@ -816,20 +816,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py index 8f940283b..5b2c54567 100644 --- a/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Mistral_v0.3_(7B)-GRPO.py @@ -328,20 +328,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py index d4fea3bc1..21ae41b51 100644 --- a/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Phi_4_(14B)-GRPO.py @@ -331,20 +331,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py index f40910d0f..029412852 100644 --- a/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen2.5_(3B)-GRPO.py @@ -327,20 +327,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py index 2a9469a9e..8e6d589c4 100644 --- a/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen2_5_7B_VL_GRPO.py @@ -435,20 +435,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py index b0c76c265..c8df69b7e 100644 --- a/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen3_(4B)-GRPO.py @@ -743,20 +743,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py b/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py index cfb658d35..aa4143c9e 100644 --- a/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen3_8B_FP8_GRPO.py @@ -817,20 +817,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py index 86f7c6ffd..bdd959bcf 100644 --- a/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py +++ b/python_scripts/HuggingFace Course-Qwen3_VL_(8B)-Vision-GRPO.py @@ -449,20 +449,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py index a67190a29..de0fe197c 100644 --- a/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py +++ b/python_scripts/HuggingFace Course-gpt-oss-(20B)-GRPO.py @@ -785,14 +785,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.py b/python_scripts/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.py index 3654a15d7..7d3a09098 100644 --- a/python_scripts/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.py +++ b/python_scripts/HuggingFace Course-gpt-oss-(20B)_A100-GRPO.py @@ -341,21 +341,21 @@ def formatting_prompts_func(examples): # Merge to mxfp 4bit if False: - model.save_pretrained_merged("gpt-oss-finetune", tokenizer, save_method = "mxfp4",) + model.save_pretrained_merged("gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "mxfp4", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4", token = "") # Merge and push to hub in 16bit if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gpt-oss-finetune") - tokenizer.save_pretrained("gpt-oss-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gpt-oss-finetune", token = "") - tokenizer.push_to_hub("hf/gpt-oss-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py index 395220962..644ec48c8 100644 --- a/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/HuggingFace Course-gpt_oss_(20B)_GRPO_BF16.py @@ -785,14 +785,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py index fd36e4e37..0cd708117 100644 --- a/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py +++ b/python_scripts/Kaggle-Advanced_Llama3_1_(3B)_GRPO_LoRA.py @@ -464,20 +464,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py index 7a4edf2a1..ee3805fce 100644 --- a/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py +++ b/python_scripts/Kaggle-Advanced_Llama3_2_(3B)_GRPO_LoRA.py @@ -467,20 +467,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py b/python_scripts/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py index 7e8d9c9ec..cd096d8b2 100644 --- a/python_scripts/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py +++ b/python_scripts/Kaggle-CodeForces-cot-Finetune_for_Reasoning_on_CodeForces.py @@ -325,20 +325,20 @@ def formatting_prompts_func(example): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-CodeGemma_(7B)-Conversational.py b/python_scripts/Kaggle-CodeGemma_(7B)-Conversational.py index 3a3439799..ee45ee375 100644 --- a/python_scripts/Kaggle-CodeGemma_(7B)-Conversational.py +++ b/python_scripts/Kaggle-CodeGemma_(7B)-Conversational.py @@ -369,20 +369,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py index 947ca3a2b..1de78316a 100644 --- a/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py +++ b/python_scripts/Kaggle-DeepSeek_R1_0528_Qwen3_(8B)_GRPO.py @@ -732,20 +732,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Deepseek_OCR_(3B)-Eval.py b/python_scripts/Kaggle-Deepseek_OCR_(3B)-Eval.py index 81c729c5b..49bcf9ff2 100644 --- a/python_scripts/Kaggle-Deepseek_OCR_(3B)-Eval.py +++ b/python_scripts/Kaggle-Deepseek_OCR_(3B)-Eval.py @@ -730,10 +730,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Deepseek_OCR_(3B)-Evaluation.py b/python_scripts/Kaggle-Deepseek_OCR_(3B)-Evaluation.py index 601bd6dbe..ea8784ccb 100644 --- a/python_scripts/Kaggle-Deepseek_OCR_(3B)-Evaluation.py +++ b/python_scripts/Kaggle-Deepseek_OCR_(3B)-Evaluation.py @@ -901,10 +901,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Deepseek_OCR_(3B).py b/python_scripts/Kaggle-Deepseek_OCR_(3B).py index 81c729c5b..49bcf9ff2 100644 --- a/python_scripts/Kaggle-Deepseek_OCR_(3B).py +++ b/python_scripts/Kaggle-Deepseek_OCR_(3B).py @@ -730,10 +730,10 @@ def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, torch.Tensor]: # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.py b/python_scripts/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.py index c8ba14187..f884ab040 100644 --- a/python_scripts/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.py +++ b/python_scripts/Kaggle-ERNIE_4_5_21B_A3B_PT-Conversational.py @@ -311,23 +311,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.py b/python_scripts/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.py index 507c8c70d..09cc934ae 100644 --- a/python_scripts/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.py +++ b/python_scripts/Kaggle-ERNIE_4_5_VL_28B_A3B_PT_Vision.py @@ -602,10 +602,10 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py b/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py index 0bcf94009..a399c7be3 100644 --- a/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py +++ b/python_scripts/Kaggle-Falcon_H1_(0.5B)-Alpaca.py @@ -331,20 +331,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py b/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py index e45b6121b..abee0e8d9 100644 --- a/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py +++ b/python_scripts/Kaggle-Gemma2_(2B)-Alpaca.py @@ -321,20 +321,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Gemma2_(9B)-Alpaca.py b/python_scripts/Kaggle-Gemma2_(9B)-Alpaca.py index 3cb90697f..ee6287a90 100644 --- a/python_scripts/Kaggle-Gemma2_(9B)-Alpaca.py +++ b/python_scripts/Kaggle-Gemma2_(9B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Gemma3N_(4B)-Audio.py b/python_scripts/Kaggle-Gemma3N_(4B)-Audio.py index 2169a91d4..b2bd62fa6 100644 --- a/python_scripts/Kaggle-Gemma3N_(4B)-Audio.py +++ b/python_scripts/Kaggle-Gemma3N_(4B)-Audio.py @@ -455,7 +455,7 @@ def collate_fn(examples): if True: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3n", processor) + model.save_pretrained_merged("gemma-3n-merged", processor) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Kaggle-Gemma3N_(4B)-Conversational.py b/python_scripts/Kaggle-Gemma3N_(4B)-Conversational.py index d31e62ea6..64f364dc2 100644 --- a/python_scripts/Kaggle-Gemma3N_(4B)-Conversational.py +++ b/python_scripts/Kaggle-Gemma3N_(4B)-Conversational.py @@ -493,7 +493,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3N-finetune", tokenizer) + model.save_pretrained_merged("gemma-3N-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Kaggle-Gemma3N_(4B)-Vision.py b/python_scripts/Kaggle-Gemma3N_(4B)-Vision.py index dffd7727a..be938601a 100644 --- a/python_scripts/Kaggle-Gemma3N_(4B)-Vision.py +++ b/python_scripts/Kaggle-Gemma3N_(4B)-Vision.py @@ -439,10 +439,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", processor,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", processor,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", processor, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", processor, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py index 80755482d..aa71df9c0 100644 --- a/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py +++ b/python_scripts/Kaggle-Gemma3_(1B)-GRPO.py @@ -421,7 +421,7 @@ def check_numbers(prompts, completions, answer, **kwargs): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Kaggle-Gemma3_(270M).py b/python_scripts/Kaggle-Gemma3_(270M).py index 847455677..1e751017c 100644 --- a/python_scripts/Kaggle-Gemma3_(270M).py +++ b/python_scripts/Kaggle-Gemma3_(270M).py @@ -332,23 +332,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("gemma-3-finetune", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gemma-3-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gemma-3-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("gemma-3-finetune", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("gemma-3-finetune-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gemma-3-finetune", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/gemma-3-finetune-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gemma-3-finetune") - tokenizer.save_pretrained("gemma-3-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gemma-3-finetune", token = "") - tokenizer.push_to_hub("hf/gemma-3-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Gemma3_(27B)_A100-Conversational.py b/python_scripts/Kaggle-Gemma3_(27B)_A100-Conversational.py index be258f9ea..7466f85f2 100644 --- a/python_scripts/Kaggle-Gemma3_(27B)_A100-Conversational.py +++ b/python_scripts/Kaggle-Gemma3_(27B)_A100-Conversational.py @@ -375,7 +375,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py index 6739bf632..7a2be5015 100644 --- a/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py +++ b/python_scripts/Kaggle-Gemma3_(4B)-Vision-GRPO.py @@ -446,10 +446,10 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Gemma3_(4B)-Vision.py b/python_scripts/Kaggle-Gemma3_(4B)-Vision.py index 1fc459713..0f34f9a05 100644 --- a/python_scripts/Kaggle-Gemma3_(4B)-Vision.py +++ b/python_scripts/Kaggle-Gemma3_(4B)-Vision.py @@ -432,10 +432,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", processor,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", processor,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", processor, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", processor, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Gemma3_(4B).py b/python_scripts/Kaggle-Gemma3_(4B).py index e84121f71..95fc4bce1 100644 --- a/python_scripts/Kaggle-Gemma3_(4B).py +++ b/python_scripts/Kaggle-Gemma3_(4B).py @@ -375,7 +375,7 @@ def formatting_prompts_func(examples): if False: # Change to True to save finetune! - model.save_pretrained_merged("gemma-3-finetune", tokenizer) + model.save_pretrained_merged("gemma-3-finetune-merged", tokenizer) # If you want to upload / push to your Hugging Face account, set `if False` to `if True` and add your Hugging Face token and upload location! diff --git a/python_scripts/Kaggle-Granite4.0.py b/python_scripts/Kaggle-Granite4.0.py index 98a67e5ca..de52caeb3 100644 --- a/python_scripts/Kaggle-Granite4.0.py +++ b/python_scripts/Kaggle-Granite4.0.py @@ -484,23 +484,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Granite4.0_350M.py b/python_scripts/Kaggle-Granite4.0_350M.py index 7a7676dcc..471ffa19f 100644 --- a/python_scripts/Kaggle-Granite4.0_350M.py +++ b/python_scripts/Kaggle-Granite4.0_350M.py @@ -490,23 +490,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Liquid_LFM2_(1.2B)-Conversational.py b/python_scripts/Kaggle-Liquid_LFM2_(1.2B)-Conversational.py index 0db2e9456..01c1ae70f 100644 --- a/python_scripts/Kaggle-Liquid_LFM2_(1.2B)-Conversational.py +++ b/python_scripts/Kaggle-Liquid_LFM2_(1.2B)-Conversational.py @@ -388,20 +388,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3.1_(8B)-Alpaca.py b/python_scripts/Kaggle-Llama3.1_(8B)-Alpaca.py index 422bd7772..b24154129 100644 --- a/python_scripts/Kaggle-Llama3.1_(8B)-Alpaca.py +++ b/python_scripts/Kaggle-Llama3.1_(8B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py index f140c97de..f26a7f5c8 100644 --- a/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py +++ b/python_scripts/Kaggle-Llama3.1_(8B)-GRPO.py @@ -304,20 +304,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3.2_(11B)-Vision.py b/python_scripts/Kaggle-Llama3.2_(11B)-Vision.py index 607bbdb83..7bed23586 100644 --- a/python_scripts/Kaggle-Llama3.2_(11B)-Vision.py +++ b/python_scripts/Kaggle-Llama3.2_(11B)-Vision.py @@ -383,10 +383,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Llama3.2_(1B)-RAFT.py b/python_scripts/Kaggle-Llama3.2_(1B)-RAFT.py index eb7efc210..b821943ef 100644 --- a/python_scripts/Kaggle-Llama3.2_(1B)-RAFT.py +++ b/python_scripts/Kaggle-Llama3.2_(1B)-RAFT.py @@ -337,20 +337,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3.2_(1B_and_3B)-Conversational.py b/python_scripts/Kaggle-Llama3.2_(1B_and_3B)-Conversational.py index 71f623254..d8f82c515 100644 --- a/python_scripts/Kaggle-Llama3.2_(1B_and_3B)-Conversational.py +++ b/python_scripts/Kaggle-Llama3.2_(1B_and_3B)-Conversational.py @@ -398,20 +398,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3.3_(70B)_A100-Conversational.py b/python_scripts/Kaggle-Llama3.3_(70B)_A100-Conversational.py index 0e1d546d8..54c1a13c1 100644 --- a/python_scripts/Kaggle-Llama3.3_(70B)_A100-Conversational.py +++ b/python_scripts/Kaggle-Llama3.3_(70B)_A100-Conversational.py @@ -398,20 +398,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3_(8B)-Alpaca.py b/python_scripts/Kaggle-Llama3_(8B)-Alpaca.py index ac543b529..3b75c08d1 100644 --- a/python_scripts/Kaggle-Llama3_(8B)-Alpaca.py +++ b/python_scripts/Kaggle-Llama3_(8B)-Alpaca.py @@ -311,20 +311,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3_(8B)-Conversational.py b/python_scripts/Kaggle-Llama3_(8B)-Conversational.py index 24100d436..ed4d9a86a 100644 --- a/python_scripts/Kaggle-Llama3_(8B)-Conversational.py +++ b/python_scripts/Kaggle-Llama3_(8B)-Conversational.py @@ -362,20 +362,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama3_(8B)-ORPO.py b/python_scripts/Kaggle-Llama3_(8B)-ORPO.py index 6f369c716..fd4c706a7 100644 --- a/python_scripts/Kaggle-Llama3_(8B)-ORPO.py +++ b/python_scripts/Kaggle-Llama3_(8B)-ORPO.py @@ -318,20 +318,20 @@ def format_prompt(sample): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llama_FP8_GRPO.py b/python_scripts/Kaggle-Llama_FP8_GRPO.py index c4d29ec9e..0c5a0a227 100644 --- a/python_scripts/Kaggle-Llama_FP8_GRPO.py +++ b/python_scripts/Kaggle-Llama_FP8_GRPO.py @@ -792,20 +792,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Llasa_TTS_(1B).py b/python_scripts/Kaggle-Llasa_TTS_(1B).py index 7aace11b5..a4e794913 100644 --- a/python_scripts/Kaggle-Llasa_TTS_(1B).py +++ b/python_scripts/Kaggle-Llasa_TTS_(1B).py @@ -561,20 +561,20 @@ def extract_speech_ids(speech_tokens_str): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Llasa_TTS_(3B).py b/python_scripts/Kaggle-Llasa_TTS_(3B).py index 59ab835ae..f8bc40945 100644 --- a/python_scripts/Kaggle-Llasa_TTS_(3B).py +++ b/python_scripts/Kaggle-Llasa_TTS_(3B).py @@ -561,20 +561,20 @@ def extract_speech_ids(speech_tokens_str): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Magistral_(24B)-Reasoning-Conversational.py b/python_scripts/Kaggle-Magistral_(24B)-Reasoning-Conversational.py index 647a1324d..54ce219b0 100644 --- a/python_scripts/Kaggle-Magistral_(24B)-Reasoning-Conversational.py +++ b/python_scripts/Kaggle-Magistral_(24B)-Reasoning-Conversational.py @@ -297,23 +297,23 @@ def generate_conversation(example): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).py b/python_scripts/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).py index 711664032..46129916d 100644 --- a/python_scripts/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).py +++ b/python_scripts/Kaggle-Meta-Synthetic-Data-Llama3.1_(8B).py @@ -620,20 +620,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).py b/python_scripts/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).py index ad0102230..3c8d02f08 100644 --- a/python_scripts/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).py +++ b/python_scripts/Kaggle-Meta_Synthetic_Data_Llama3_2_(3B).py @@ -430,23 +430,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Change to True to upload finetune - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Change to True to upload finetune - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Change to True to upload finetune - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py b/python_scripts/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py index 4fa9850f3..1483c4370 100644 --- a/python_scripts/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py +++ b/python_scripts/Kaggle-Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py @@ -830,20 +830,20 @@ def strategy_succeeds(completions, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Ministral_3_VL_(3B)_Vision.py b/python_scripts/Kaggle-Ministral_3_VL_(3B)_Vision.py index 68e650a39..32024f79e 100644 --- a/python_scripts/Kaggle-Ministral_3_VL_(3B)_Vision.py +++ b/python_scripts/Kaggle-Ministral_3_VL_(3B)_Vision.py @@ -387,10 +387,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_Nemo_(12B)-Alpaca.py b/python_scripts/Kaggle-Mistral_Nemo_(12B)-Alpaca.py index 44bcb6ef4..b91fa2991 100644 --- a/python_scripts/Kaggle-Mistral_Nemo_(12B)-Alpaca.py +++ b/python_scripts/Kaggle-Mistral_Nemo_(12B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_Small_(22B)-Alpaca.py b/python_scripts/Kaggle-Mistral_Small_(22B)-Alpaca.py index b609e42d4..f522a6fc2 100644 --- a/python_scripts/Kaggle-Mistral_Small_(22B)-Alpaca.py +++ b/python_scripts/Kaggle-Mistral_Small_(22B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-Alpaca.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-Alpaca.py index 50c62eb0c..eac3e19bd 100644 --- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-Alpaca.py +++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-Alpaca.py @@ -311,20 +311,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-CPT.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-CPT.py index fdaed83bd..c13d5dc36 100644 --- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-CPT.py +++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-CPT.py @@ -484,20 +484,20 @@ def formatting_prompts_func(conversations): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-Conversational.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-Conversational.py index 2ed6b531d..35bf9b1fe 100644 --- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-Conversational.py +++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-Conversational.py @@ -369,20 +369,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py index 1119c6911..9e2b604aa 100644 --- a/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/Kaggle-Mistral_v0.3_(7B)-GRPO.py @@ -304,20 +304,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Nemotron-3-Nano-30B-A3B_A100.py b/python_scripts/Kaggle-Nemotron-3-Nano-30B-A3B_A100.py index 527517401..fa3e96268 100644 --- a/python_scripts/Kaggle-Nemotron-3-Nano-30B-A3B_A100.py +++ b/python_scripts/Kaggle-Nemotron-3-Nano-30B-A3B_A100.py @@ -314,23 +314,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Nemotron-Nano-3-30B-A3B_A100.py b/python_scripts/Kaggle-Nemotron-Nano-3-30B-A3B_A100.py index 527517401..fa3e96268 100644 --- a/python_scripts/Kaggle-Nemotron-Nano-3-30B-A3B_A100.py +++ b/python_scripts/Kaggle-Nemotron-Nano-3-30B-A3B_A100.py @@ -314,23 +314,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Orpheus_(3B)-TTS.py b/python_scripts/Kaggle-Orpheus_(3B)-TTS.py index 64ce119d8..c0e3bc234 100644 --- a/python_scripts/Kaggle-Orpheus_(3B)-TTS.py +++ b/python_scripts/Kaggle-Orpheus_(3B)-TTS.py @@ -471,20 +471,20 @@ def redistribute_codes(code_list): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Oute_TTS_(1B).py b/python_scripts/Kaggle-Oute_TTS_(1B).py index 17b04ca95..ae029f8f8 100644 --- a/python_scripts/Kaggle-Oute_TTS_(1B).py +++ b/python_scripts/Kaggle-Oute_TTS_(1B).py @@ -527,20 +527,20 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Paddle_OCR_(1B)_Vision.py b/python_scripts/Kaggle-Paddle_OCR_(1B)_Vision.py index 8fc9094c4..ae06ff11e 100644 --- a/python_scripts/Kaggle-Paddle_OCR_(1B)_Vision.py +++ b/python_scripts/Kaggle-Paddle_OCR_(1B)_Vision.py @@ -401,10 +401,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Phi_3.5_Mini-Conversational.py b/python_scripts/Kaggle-Phi_3.5_Mini-Conversational.py index dedacb723..3295156e3 100644 --- a/python_scripts/Kaggle-Phi_3.5_Mini-Conversational.py +++ b/python_scripts/Kaggle-Phi_3.5_Mini-Conversational.py @@ -370,20 +370,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Phi_3_Medium-Conversational.py b/python_scripts/Kaggle-Phi_3_Medium-Conversational.py index 9465a6406..435482a51 100644 --- a/python_scripts/Kaggle-Phi_3_Medium-Conversational.py +++ b/python_scripts/Kaggle-Phi_3_Medium-Conversational.py @@ -365,20 +365,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Phi_4-Conversational.py b/python_scripts/Kaggle-Phi_4-Conversational.py index 0077b012d..495f9164f 100644 --- a/python_scripts/Kaggle-Phi_4-Conversational.py +++ b/python_scripts/Kaggle-Phi_4-Conversational.py @@ -399,20 +399,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py index 3a80c1aba..1b8bbf37f 100644 --- a/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py +++ b/python_scripts/Kaggle-Phi_4_(14B)-GRPO.py @@ -307,20 +307,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Pixtral_(12B)-Vision.py b/python_scripts/Kaggle-Pixtral_(12B)-Vision.py index c39d3cb20..5ee754473 100644 --- a/python_scripts/Kaggle-Pixtral_(12B)-Vision.py +++ b/python_scripts/Kaggle-Pixtral_(12B)-Vision.py @@ -313,10 +313,10 @@ # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py index 873939682..ffc0c2ca7 100644 --- a/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/Kaggle-Qwen2.5_(3B)-GRPO.py @@ -303,20 +303,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen2.5_(7B)-Alpaca.py b/python_scripts/Kaggle-Qwen2.5_(7B)-Alpaca.py index 35c066b4c..5b3769bfe 100644 --- a/python_scripts/Kaggle-Qwen2.5_(7B)-Alpaca.py +++ b/python_scripts/Kaggle-Qwen2.5_(7B)-Alpaca.py @@ -322,20 +322,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen2.5_Coder_(14B)-Conversational.py b/python_scripts/Kaggle-Qwen2.5_Coder_(14B)-Conversational.py index c7ec53ce5..c0b0247f8 100644 --- a/python_scripts/Kaggle-Qwen2.5_Coder_(14B)-Conversational.py +++ b/python_scripts/Kaggle-Qwen2.5_Coder_(14B)-Conversational.py @@ -403,20 +403,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen2.5_VL_(7B)-Vision.py b/python_scripts/Kaggle-Qwen2.5_VL_(7B)-Vision.py index 2bbc8a668..6170081aa 100644 --- a/python_scripts/Kaggle-Qwen2.5_VL_(7B)-Vision.py +++ b/python_scripts/Kaggle-Qwen2.5_VL_(7B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Qwen2_(7B)-Alpaca.py b/python_scripts/Kaggle-Qwen2_(7B)-Alpaca.py index a60db1f06..cb77c770d 100644 --- a/python_scripts/Kaggle-Qwen2_(7B)-Alpaca.py +++ b/python_scripts/Kaggle-Qwen2_(7B)-Alpaca.py @@ -319,20 +319,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py index 2f25cca38..773d6f9eb 100644 --- a/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/Kaggle-Qwen2_5_7B_VL_GRPO.py @@ -411,20 +411,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen2_VL_(7B)-Vision.py b/python_scripts/Kaggle-Qwen2_VL_(7B)-Vision.py index 4549a329f..18fd5f3c8 100644 --- a/python_scripts/Kaggle-Qwen2_VL_(7B)-Vision.py +++ b/python_scripts/Kaggle-Qwen2_VL_(7B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-Qwen3_(14B)-Alpaca.py b/python_scripts/Kaggle-Qwen3_(14B)-Alpaca.py index dcaf72414..eb6d2aa29 100644 --- a/python_scripts/Kaggle-Qwen3_(14B)-Alpaca.py +++ b/python_scripts/Kaggle-Qwen3_(14B)-Alpaca.py @@ -320,20 +320,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(14B)-Reasoning-Conversational.py b/python_scripts/Kaggle-Qwen3_(14B)-Reasoning-Conversational.py index a5c6f4dc3..a3c053e4f 100644 --- a/python_scripts/Kaggle-Qwen3_(14B)-Reasoning-Conversational.py +++ b/python_scripts/Kaggle-Qwen3_(14B)-Reasoning-Conversational.py @@ -386,23 +386,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(14B).py b/python_scripts/Kaggle-Qwen3_(14B).py index b6473d754..bcb4788f6 100644 --- a/python_scripts/Kaggle-Qwen3_(14B).py +++ b/python_scripts/Kaggle-Qwen3_(14B).py @@ -383,23 +383,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.py b/python_scripts/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.py index bd1c5b42c..fc5e90c43 100644 --- a/python_scripts/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.py +++ b/python_scripts/Kaggle-Qwen3_(32B)_A100-Reasoning-Conversational.py @@ -386,23 +386,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py index ec193bf3d..6f0abb613 100644 --- a/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py +++ b/python_scripts/Kaggle-Qwen3_(4B)-GRPO.py @@ -719,20 +719,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(4B)-Instruct.py b/python_scripts/Kaggle-Qwen3_(4B)-Instruct.py index 5f7007af2..6a8b923e2 100644 --- a/python_scripts/Kaggle-Qwen3_(4B)-Instruct.py +++ b/python_scripts/Kaggle-Qwen3_(4B)-Instruct.py @@ -321,23 +321,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_(4B)-Thinking.py b/python_scripts/Kaggle-Qwen3_(4B)-Thinking.py index 79d63195f..68126a211 100644 --- a/python_scripts/Kaggle-Qwen3_(4B)-Thinking.py +++ b/python_scripts/Kaggle-Qwen3_(4B)-Thinking.py @@ -324,23 +324,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py b/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py index d07c18b4e..b9fdef166 100644 --- a/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py +++ b/python_scripts/Kaggle-Qwen3_8B_FP8_GRPO.py @@ -793,20 +793,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py index c641b722b..d236774b3 100644 --- a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py +++ b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision-GRPO.py @@ -425,20 +425,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision.py b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision.py index 1aa0c90fb..3b369101b 100644 --- a/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision.py +++ b/python_scripts/Kaggle-Qwen3_VL_(8B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Sesame_CSM_(1B)-TTS.py b/python_scripts/Kaggle-Sesame_CSM_(1B)-TTS.py index 5ffe22a18..9dd3c45e8 100644 --- a/python_scripts/Kaggle-Sesame_CSM_(1B)-TTS.py +++ b/python_scripts/Kaggle-Sesame_CSM_(1B)-TTS.py @@ -367,19 +367,19 @@ def preprocess_example(example): # Merge to 16bit -if False: model.save_pretrained_merged("model", processor, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", processor, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", processor, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", processor, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", processor, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", processor, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", processor, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", processor, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") + model.save_pretrained("lora_model") processor.save_pretrained("model") if False: - model.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") processor.push_to_hub("hf/model", token = "") diff --git a/python_scripts/Kaggle-Spark_TTS_(0_5B).py b/python_scripts/Kaggle-Spark_TTS_(0_5B).py index b7f146107..b7786f4f9 100644 --- a/python_scripts/Kaggle-Spark_TTS_(0_5B).py +++ b/python_scripts/Kaggle-Spark_TTS_(0_5B).py @@ -460,20 +460,20 @@ def generate_speech_from_text( # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py b/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py index 6efc7c225..f4aea0889 100644 --- a/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py +++ b/python_scripts/Kaggle-TinyLlama_(1.1B)-Alpaca.py @@ -316,20 +316,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-Whisper.py b/python_scripts/Kaggle-Whisper.py index 62168b991..d021e90a9 100644 --- a/python_scripts/Kaggle-Whisper.py +++ b/python_scripts/Kaggle-Whisper.py @@ -317,20 +317,20 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = None,) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = None,) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.py b/python_scripts/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.py index ab94e7a4c..d9244d25f 100644 --- a/python_scripts/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.py +++ b/python_scripts/Kaggle-gpt-oss-(120B)_A100-Fine-tuning.py @@ -339,21 +339,21 @@ def formatting_prompts_func(examples): # Merge to mxfp 4bit if False: - model.save_pretrained_merged("gpt-oss-finetune", tokenizer, save_method = "mxfp4",) + model.save_pretrained_merged("gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "mxfp4", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4", token = "") # Merge and push to hub in 16bit if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gpt-oss-finetune") - tokenizer.save_pretrained("gpt-oss-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gpt-oss-finetune", token = "") - tokenizer.push_to_hub("hf/gpt-oss-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-gpt-oss-(20B)-Fine-tuning.py b/python_scripts/Kaggle-gpt-oss-(20B)-Fine-tuning.py index e292d4aff..8a83aae8b 100644 --- a/python_scripts/Kaggle-gpt-oss-(20B)-Fine-tuning.py +++ b/python_scripts/Kaggle-gpt-oss-(20B)-Fine-tuning.py @@ -368,14 +368,14 @@ def formatting_prompts_func(examples): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py index a67190a29..de0fe197c 100644 --- a/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py +++ b/python_scripts/Kaggle-gpt-oss-(20B)-GRPO.py @@ -785,14 +785,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-gpt-oss-(20B)_A100-GRPO.py b/python_scripts/Kaggle-gpt-oss-(20B)_A100-GRPO.py index 2eda416c4..669a89e78 100644 --- a/python_scripts/Kaggle-gpt-oss-(20B)_A100-GRPO.py +++ b/python_scripts/Kaggle-gpt-oss-(20B)_A100-GRPO.py @@ -339,21 +339,21 @@ def formatting_prompts_func(examples): # Merge to mxfp 4bit if False: - model.save_pretrained_merged("gpt-oss-finetune", tokenizer, save_method = "mxfp4",) + model.save_pretrained_merged("gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "mxfp4", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4", token = "") # Merge and push to hub in 16bit if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gpt-oss-finetune") - tokenizer.save_pretrained("gpt-oss-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gpt-oss-finetune", token = "") - tokenizer.push_to_hub("hf/gpt-oss-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.py b/python_scripts/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.py index c4ac94dcc..45b5a769b 100644 --- a/python_scripts/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.py +++ b/python_scripts/Kaggle-gpt_oss_(20B)_500K_Context_Fine_tuning.py @@ -244,14 +244,14 @@ class Book: # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py index b5028fbc8..686505a99 100644 --- a/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/Kaggle-gpt_oss_(20B)_GRPO_BF16.py @@ -783,14 +783,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Liquid_LFM2-Conversational.py b/python_scripts/Liquid_LFM2-Conversational.py index b51d5861d..01a2e5b3d 100644 --- a/python_scripts/Liquid_LFM2-Conversational.py +++ b/python_scripts/Liquid_LFM2-Conversational.py @@ -383,20 +383,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Liquid_LFM2_(1.2B)-Conversational.py b/python_scripts/Liquid_LFM2_(1.2B)-Conversational.py index 710e49eb3..b3ce827f9 100644 --- a/python_scripts/Liquid_LFM2_(1.2B)-Conversational.py +++ b/python_scripts/Liquid_LFM2_(1.2B)-Conversational.py @@ -388,20 +388,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3.1_(8B)-Alpaca.py b/python_scripts/Llama3.1_(8B)-Alpaca.py index 59ebb09aa..f2fdaf33e 100644 --- a/python_scripts/Llama3.1_(8B)-Alpaca.py +++ b/python_scripts/Llama3.1_(8B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3.1_(8B)-GRPO.py b/python_scripts/Llama3.1_(8B)-GRPO.py index 2768c564a..2c083fd87 100644 --- a/python_scripts/Llama3.1_(8B)-GRPO.py +++ b/python_scripts/Llama3.1_(8B)-GRPO.py @@ -326,20 +326,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3.2_(11B)-Vision.py b/python_scripts/Llama3.2_(11B)-Vision.py index 7a5399e64..d0d515c95 100644 --- a/python_scripts/Llama3.2_(11B)-Vision.py +++ b/python_scripts/Llama3.2_(11B)-Vision.py @@ -383,10 +383,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Llama3.2_(1B)-RAFT.py b/python_scripts/Llama3.2_(1B)-RAFT.py index 33d101177..65fe1d0e7 100644 --- a/python_scripts/Llama3.2_(1B)-RAFT.py +++ b/python_scripts/Llama3.2_(1B)-RAFT.py @@ -337,20 +337,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3.2_(1B_and_3B)-Conversational.py b/python_scripts/Llama3.2_(1B_and_3B)-Conversational.py index dbf473b0c..1cfc16186 100644 --- a/python_scripts/Llama3.2_(1B_and_3B)-Conversational.py +++ b/python_scripts/Llama3.2_(1B_and_3B)-Conversational.py @@ -398,20 +398,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3.3_(70B)_A100-Conversational.py b/python_scripts/Llama3.3_(70B)_A100-Conversational.py index a54f6b177..cdf8b8617 100644 --- a/python_scripts/Llama3.3_(70B)_A100-Conversational.py +++ b/python_scripts/Llama3.3_(70B)_A100-Conversational.py @@ -398,20 +398,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3_(8B)-Alpaca.py b/python_scripts/Llama3_(8B)-Alpaca.py index c6d78d8fb..b632e3cb8 100644 --- a/python_scripts/Llama3_(8B)-Alpaca.py +++ b/python_scripts/Llama3_(8B)-Alpaca.py @@ -311,20 +311,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3_(8B)-Conversational.py b/python_scripts/Llama3_(8B)-Conversational.py index c33edf846..2bd9fca18 100644 --- a/python_scripts/Llama3_(8B)-Conversational.py +++ b/python_scripts/Llama3_(8B)-Conversational.py @@ -362,20 +362,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama3_(8B)-ORPO.py b/python_scripts/Llama3_(8B)-ORPO.py index 9674d6ab5..d563d7dd7 100644 --- a/python_scripts/Llama3_(8B)-ORPO.py +++ b/python_scripts/Llama3_(8B)-ORPO.py @@ -318,20 +318,20 @@ def format_prompt(sample): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llama_FP8_GRPO.py b/python_scripts/Llama_FP8_GRPO.py index bf6654d94..78fe9dac8 100644 --- a/python_scripts/Llama_FP8_GRPO.py +++ b/python_scripts/Llama_FP8_GRPO.py @@ -814,20 +814,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Llasa_TTS_(1B).py b/python_scripts/Llasa_TTS_(1B).py index 5a1a4aadb..b0ec0338d 100644 --- a/python_scripts/Llasa_TTS_(1B).py +++ b/python_scripts/Llasa_TTS_(1B).py @@ -561,20 +561,20 @@ def extract_speech_ids(speech_tokens_str): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Llasa_TTS_(3B).py b/python_scripts/Llasa_TTS_(3B).py index 94f266cf9..58953fea7 100644 --- a/python_scripts/Llasa_TTS_(3B).py +++ b/python_scripts/Llasa_TTS_(3B).py @@ -561,20 +561,20 @@ def extract_speech_ids(speech_tokens_str): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Magistral_(24B)-Reasoning-Conversational.py b/python_scripts/Magistral_(24B)-Reasoning-Conversational.py index d72a7de40..7effa202c 100644 --- a/python_scripts/Magistral_(24B)-Reasoning-Conversational.py +++ b/python_scripts/Magistral_(24B)-Reasoning-Conversational.py @@ -297,23 +297,23 @@ def generate_conversation(example): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Meta-Synthetic-Data-Llama3.1_(8B).py b/python_scripts/Meta-Synthetic-Data-Llama3.1_(8B).py index af5f791f4..8d61726ab 100644 --- a/python_scripts/Meta-Synthetic-Data-Llama3.1_(8B).py +++ b/python_scripts/Meta-Synthetic-Data-Llama3.1_(8B).py @@ -644,20 +644,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Meta_Synthetic_Data_Llama3_2_(3B).py b/python_scripts/Meta_Synthetic_Data_Llama3_2_(3B).py index 675cafe05..96536536b 100644 --- a/python_scripts/Meta_Synthetic_Data_Llama3_2_(3B).py +++ b/python_scripts/Meta_Synthetic_Data_Llama3_2_(3B).py @@ -454,23 +454,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Change to True to upload finetune - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Change to True to upload finetune - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Change to True to upload finetune - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py b/python_scripts/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py index 280643323..de092c467 100644 --- a/python_scripts/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py +++ b/python_scripts/Ministral_3_(3B)_Reinforcement_Learning_Sudoku_Game.py @@ -819,20 +819,20 @@ def strategy_succeeds(completions, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Ministral_3_VL_(3B)_Vision.py b/python_scripts/Ministral_3_VL_(3B)_Vision.py index ff954d2f7..f2e1433b9 100644 --- a/python_scripts/Ministral_3_VL_(3B)_Vision.py +++ b/python_scripts/Ministral_3_VL_(3B)_Vision.py @@ -387,10 +387,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_Nemo_(12B)-Alpaca.py b/python_scripts/Mistral_Nemo_(12B)-Alpaca.py index 6e1d70108..9d901e8a4 100644 --- a/python_scripts/Mistral_Nemo_(12B)-Alpaca.py +++ b/python_scripts/Mistral_Nemo_(12B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_Small_(22B)-Alpaca.py b/python_scripts/Mistral_Small_(22B)-Alpaca.py index f06a03251..e28379018 100644 --- a/python_scripts/Mistral_Small_(22B)-Alpaca.py +++ b/python_scripts/Mistral_Small_(22B)-Alpaca.py @@ -318,20 +318,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_v0.3_(7B)-Alpaca.py b/python_scripts/Mistral_v0.3_(7B)-Alpaca.py index 48c82319f..3a48b4da3 100644 --- a/python_scripts/Mistral_v0.3_(7B)-Alpaca.py +++ b/python_scripts/Mistral_v0.3_(7B)-Alpaca.py @@ -311,20 +311,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_v0.3_(7B)-CPT.py b/python_scripts/Mistral_v0.3_(7B)-CPT.py index 30d621ecf..635227073 100644 --- a/python_scripts/Mistral_v0.3_(7B)-CPT.py +++ b/python_scripts/Mistral_v0.3_(7B)-CPT.py @@ -484,20 +484,20 @@ def formatting_prompts_func(conversations): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_v0.3_(7B)-Conversational.py b/python_scripts/Mistral_v0.3_(7B)-Conversational.py index 88468feb7..69e4a3590 100644 --- a/python_scripts/Mistral_v0.3_(7B)-Conversational.py +++ b/python_scripts/Mistral_v0.3_(7B)-Conversational.py @@ -369,20 +369,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Mistral_v0.3_(7B)-GRPO.py b/python_scripts/Mistral_v0.3_(7B)-GRPO.py index ffc8fcfc2..75b697872 100644 --- a/python_scripts/Mistral_v0.3_(7B)-GRPO.py +++ b/python_scripts/Mistral_v0.3_(7B)-GRPO.py @@ -326,20 +326,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Nemotron-3-Nano-30B-A3B_A100.py b/python_scripts/Nemotron-3-Nano-30B-A3B_A100.py index 527517401..fa3e96268 100644 --- a/python_scripts/Nemotron-3-Nano-30B-A3B_A100.py +++ b/python_scripts/Nemotron-3-Nano-30B-A3B_A100.py @@ -314,23 +314,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Nemotron-Nano-3-30B-A3B_A100.py b/python_scripts/Nemotron-Nano-3-30B-A3B_A100.py index 527517401..fa3e96268 100644 --- a/python_scripts/Nemotron-Nano-3-30B-A3B_A100.py +++ b/python_scripts/Nemotron-Nano-3-30B-A3B_A100.py @@ -314,23 +314,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py b/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py index 6596c040f..877e8a56f 100644 --- a/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py +++ b/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py @@ -713,15 +713,15 @@ def strategy_succeeds(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") if False: - model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # # And we're done! diff --git a/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py b/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py index 3583cb15a..f21c2d055 100644 --- a/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py +++ b/python_scripts/OpenEnv_gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py @@ -710,15 +710,15 @@ def strategy_succeeds(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") if False: - model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # # And we're done! diff --git a/python_scripts/Orpheus_(3B)-TTS.py b/python_scripts/Orpheus_(3B)-TTS.py index dd59d349e..d1ba67a2c 100644 --- a/python_scripts/Orpheus_(3B)-TTS.py +++ b/python_scripts/Orpheus_(3B)-TTS.py @@ -471,20 +471,20 @@ def redistribute_codes(code_list): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Oute_TTS_(1B).py b/python_scripts/Oute_TTS_(1B).py index 588b5620d..d9afec827 100644 --- a/python_scripts/Oute_TTS_(1B).py +++ b/python_scripts/Oute_TTS_(1B).py @@ -527,20 +527,20 @@ def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> to # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Paddle_OCR_(1B)_Vision.py b/python_scripts/Paddle_OCR_(1B)_Vision.py index e87649536..eecc14a6b 100644 --- a/python_scripts/Paddle_OCR_(1B)_Vision.py +++ b/python_scripts/Paddle_OCR_(1B)_Vision.py @@ -401,10 +401,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Phi_3.5_Mini-Conversational.py b/python_scripts/Phi_3.5_Mini-Conversational.py index 1664d6bc8..e7131a8fc 100644 --- a/python_scripts/Phi_3.5_Mini-Conversational.py +++ b/python_scripts/Phi_3.5_Mini-Conversational.py @@ -370,20 +370,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Phi_3_Medium-Conversational.py b/python_scripts/Phi_3_Medium-Conversational.py index 989406cec..93f61c2a8 100644 --- a/python_scripts/Phi_3_Medium-Conversational.py +++ b/python_scripts/Phi_3_Medium-Conversational.py @@ -365,20 +365,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Phi_4-Conversational.py b/python_scripts/Phi_4-Conversational.py index 4f74dfb75..ddfabc618 100644 --- a/python_scripts/Phi_4-Conversational.py +++ b/python_scripts/Phi_4-Conversational.py @@ -399,20 +399,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Phi_4_(14B)-GRPO.py b/python_scripts/Phi_4_(14B)-GRPO.py index 788f61daf..015795612 100644 --- a/python_scripts/Phi_4_(14B)-GRPO.py +++ b/python_scripts/Phi_4_(14B)-GRPO.py @@ -329,20 +329,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Pixtral_(12B)-Vision.py b/python_scripts/Pixtral_(12B)-Vision.py index 37e6edd85..2eb9af656 100644 --- a/python_scripts/Pixtral_(12B)-Vision.py +++ b/python_scripts/Pixtral_(12B)-Vision.py @@ -313,10 +313,10 @@ # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Qwen2.5_(3B)-GRPO.py b/python_scripts/Qwen2.5_(3B)-GRPO.py index 3395ef301..c0d09c522 100644 --- a/python_scripts/Qwen2.5_(3B)-GRPO.py +++ b/python_scripts/Qwen2.5_(3B)-GRPO.py @@ -325,20 +325,20 @@ def xmlcount_reward_func(completions, **kwargs) -> list[float]: # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen2.5_(7B)-Alpaca.py b/python_scripts/Qwen2.5_(7B)-Alpaca.py index 7b2d87f67..40860fc88 100644 --- a/python_scripts/Qwen2.5_(7B)-Alpaca.py +++ b/python_scripts/Qwen2.5_(7B)-Alpaca.py @@ -322,20 +322,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen2.5_Coder_(14B)-Conversational.py b/python_scripts/Qwen2.5_Coder_(14B)-Conversational.py index 3fbbf29e4..70bc9f898 100644 --- a/python_scripts/Qwen2.5_Coder_(14B)-Conversational.py +++ b/python_scripts/Qwen2.5_Coder_(14B)-Conversational.py @@ -403,20 +403,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen2.5_VL_(7B)-Vision.py b/python_scripts/Qwen2.5_VL_(7B)-Vision.py index f6916875e..77c7234da 100644 --- a/python_scripts/Qwen2.5_VL_(7B)-Vision.py +++ b/python_scripts/Qwen2.5_VL_(7B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Qwen2_(7B)-Alpaca.py b/python_scripts/Qwen2_(7B)-Alpaca.py index 5fc1b9b4e..a0fda1327 100644 --- a/python_scripts/Qwen2_(7B)-Alpaca.py +++ b/python_scripts/Qwen2_(7B)-Alpaca.py @@ -319,20 +319,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen2_5_7B_VL_GRPO.py b/python_scripts/Qwen2_5_7B_VL_GRPO.py index 8a94d47b5..98c97c861 100644 --- a/python_scripts/Qwen2_5_7B_VL_GRPO.py +++ b/python_scripts/Qwen2_5_7B_VL_GRPO.py @@ -433,20 +433,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen2_VL_(7B)-Vision.py b/python_scripts/Qwen2_VL_(7B)-Vision.py index c2f0e2dc9..68ce30bf2 100644 --- a/python_scripts/Qwen2_VL_(7B)-Vision.py +++ b/python_scripts/Qwen2_VL_(7B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/Qwen3_(14B)-Alpaca.py b/python_scripts/Qwen3_(14B)-Alpaca.py index c9f347be6..15b7eb977 100644 --- a/python_scripts/Qwen3_(14B)-Alpaca.py +++ b/python_scripts/Qwen3_(14B)-Alpaca.py @@ -320,20 +320,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(14B)-Reasoning-Conversational.py b/python_scripts/Qwen3_(14B)-Reasoning-Conversational.py index 08c757ecb..064b3174b 100644 --- a/python_scripts/Qwen3_(14B)-Reasoning-Conversational.py +++ b/python_scripts/Qwen3_(14B)-Reasoning-Conversational.py @@ -386,23 +386,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(14B).py b/python_scripts/Qwen3_(14B).py index c886a2ee8..1b6632cff 100644 --- a/python_scripts/Qwen3_(14B).py +++ b/python_scripts/Qwen3_(14B).py @@ -383,23 +383,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(32B)_A100-Reasoning-Conversational.py b/python_scripts/Qwen3_(32B)_A100-Reasoning-Conversational.py index 84c22fc78..b0d242e6c 100644 --- a/python_scripts/Qwen3_(32B)_A100-Reasoning-Conversational.py +++ b/python_scripts/Qwen3_(32B)_A100-Reasoning-Conversational.py @@ -386,23 +386,23 @@ def generate_conversation(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(4B)-GRPO.py b/python_scripts/Qwen3_(4B)-GRPO.py index ee548169a..6e750becb 100644 --- a/python_scripts/Qwen3_(4B)-GRPO.py +++ b/python_scripts/Qwen3_(4B)-GRPO.py @@ -741,20 +741,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(4B)-Instruct.py b/python_scripts/Qwen3_(4B)-Instruct.py index 9530304f8..acc0ad0ae 100644 --- a/python_scripts/Qwen3_(4B)-Instruct.py +++ b/python_scripts/Qwen3_(4B)-Instruct.py @@ -321,23 +321,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_(4B)-Thinking.py b/python_scripts/Qwen3_(4B)-Thinking.py index d4cc68784..3b8191885 100644 --- a/python_scripts/Qwen3_(4B)-Thinking.py +++ b/python_scripts/Qwen3_(4B)-Thinking.py @@ -324,23 +324,23 @@ def formatting_prompts_func(examples): # Merge to 16bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) + model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit if False: - model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) + model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") + model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_8B_FP8_GRPO.py b/python_scripts/Qwen3_8B_FP8_GRPO.py index 575b7b362..8ee0f4499 100644 --- a/python_scripts/Qwen3_8B_FP8_GRPO.py +++ b/python_scripts/Qwen3_8B_FP8_GRPO.py @@ -815,20 +815,20 @@ def check_numbers(prompts, completions, answer, **kwargs): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_VL_(8B)-Vision-GRPO.py b/python_scripts/Qwen3_VL_(8B)-Vision-GRPO.py index 7f6d74bc5..e05476800 100644 --- a/python_scripts/Qwen3_VL_(8B)-Vision-GRPO.py +++ b/python_scripts/Qwen3_VL_(8B)-Vision-GRPO.py @@ -425,20 +425,20 @@ def correctness_reward_func(prompts, completions, answer, **kwargs) -> list[floa # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Qwen3_VL_(8B)-Vision.py b/python_scripts/Qwen3_VL_(8B)-Vision.py index c2bc5671c..183955369 100644 --- a/python_scripts/Qwen3_VL_(8B)-Vision.py +++ b/python_scripts/Qwen3_VL_(8B)-Vision.py @@ -390,10 +390,10 @@ def convert_to_conversation(sample): # Select ONLY 1 to save! (Both not needed!) # Save locally to 16bit -if False: model.save_pretrained_merged("unsloth_finetune", tokenizer,) +if False: model.save_pretrained_merged("unsloth_finetune-merged", tokenizer,) # To export and save to your Hugging Face account -if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune", tokenizer, token = "PUT_HERE") +if False: model.push_to_hub_merged("YOUR_USERNAME/unsloth_finetune-merged", tokenizer, token = "PUT_HERE") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Sesame_CSM_(1B)-TTS.py b/python_scripts/Sesame_CSM_(1B)-TTS.py index fc30ac9e5..9392adaa8 100644 --- a/python_scripts/Sesame_CSM_(1B)-TTS.py +++ b/python_scripts/Sesame_CSM_(1B)-TTS.py @@ -367,19 +367,19 @@ def preprocess_example(example): # Merge to 16bit -if False: model.save_pretrained_merged("model", processor, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", processor, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", processor, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", processor, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", processor, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", processor, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", processor, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", processor, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") + model.save_pretrained("lora_model") processor.save_pretrained("model") if False: - model.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") processor.push_to_hub("hf/model", token = "") diff --git a/python_scripts/Spark_TTS_(0_5B).py b/python_scripts/Spark_TTS_(0_5B).py index fad45ce3c..905a47c38 100644 --- a/python_scripts/Spark_TTS_(0_5B).py +++ b/python_scripts/Spark_TTS_(0_5B).py @@ -460,20 +460,20 @@ def generate_speech_from_text( # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/TinyLlama_(1.1B)-Alpaca.py b/python_scripts/TinyLlama_(1.1B)-Alpaca.py index 3bca16bf8..fb69e9402 100644 --- a/python_scripts/TinyLlama_(1.1B)-Alpaca.py +++ b/python_scripts/TinyLlama_(1.1B)-Alpaca.py @@ -316,20 +316,20 @@ def formatting_prompts_func(examples): # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_16bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = "merged_16bit",) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/Whisper.py b/python_scripts/Whisper.py index b941ecab6..5e308fd81 100644 --- a/python_scripts/Whisper.py +++ b/python_scripts/Whisper.py @@ -317,20 +317,20 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> # Merge to 16bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = None,) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_16bit", token = "") +if False: model.save_pretrained_merged("model-merged", tokenizer, save_method = None,) +if False: model.push_to_hub_merged("hf/model-merged", tokenizer, save_method = "merged_16bit", token = "") # Merge to 4bit -if False: model.save_pretrained_merged("model", tokenizer, save_method = "merged_4bit",) -if False: model.push_to_hub_merged("hf/model", tokenizer, save_method = "merged_4bit", token = "") +if False: model.save_pretrained_merged("model-merged-4bit", tokenizer, save_method = "merged_4bit",) +if False: model.push_to_hub_merged("hf/model-merged-4bit", tokenizer, save_method = "merged_4bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("model") - tokenizer.save_pretrained("model") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: - model.push_to_hub("hf/model", token = "") - tokenizer.push_to_hub("hf/model", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/gpt-oss-(120B)_A100-Fine-tuning.py b/python_scripts/gpt-oss-(120B)_A100-Fine-tuning.py index ab94e7a4c..d9244d25f 100644 --- a/python_scripts/gpt-oss-(120B)_A100-Fine-tuning.py +++ b/python_scripts/gpt-oss-(120B)_A100-Fine-tuning.py @@ -339,21 +339,21 @@ def formatting_prompts_func(examples): # Merge to mxfp 4bit if False: - model.save_pretrained_merged("gpt-oss-finetune", tokenizer, save_method = "mxfp4",) + model.save_pretrained_merged("gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "mxfp4", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4", token = "") # Merge and push to hub in 16bit if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gpt-oss-finetune") - tokenizer.save_pretrained("gpt-oss-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gpt-oss-finetune", token = "") - tokenizer.push_to_hub("hf/gpt-oss-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/gpt-oss-(20B)-Fine-tuning.py b/python_scripts/gpt-oss-(20B)-Fine-tuning.py index e292d4aff..8a83aae8b 100644 --- a/python_scripts/gpt-oss-(20B)-Fine-tuning.py +++ b/python_scripts/gpt-oss-(20B)-Fine-tuning.py @@ -368,14 +368,14 @@ def formatting_prompts_func(examples): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/gpt-oss-(20B)-GRPO.py b/python_scripts/gpt-oss-(20B)-GRPO.py index a67190a29..de0fe197c 100644 --- a/python_scripts/gpt-oss-(20B)-GRPO.py +++ b/python_scripts/gpt-oss-(20B)-GRPO.py @@ -785,14 +785,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/gpt-oss-(20B)_A100-GRPO.py b/python_scripts/gpt-oss-(20B)_A100-GRPO.py index 2eda416c4..669a89e78 100644 --- a/python_scripts/gpt-oss-(20B)_A100-GRPO.py +++ b/python_scripts/gpt-oss-(20B)_A100-GRPO.py @@ -339,21 +339,21 @@ def formatting_prompts_func(examples): # Merge to mxfp 4bit if False: - model.save_pretrained_merged("gpt-oss-finetune", tokenizer, save_method = "mxfp4",) + model.save_pretrained_merged("gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4",) if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "mxfp4", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-mxfp4", tokenizer, save_method = "mxfp4", token = "") # Merge and push to hub in 16bit if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # Just LoRA adapters if False: - model.save_pretrained("gpt-oss-finetune") - tokenizer.save_pretrained("gpt-oss-finetune") + model.save_pretrained("lora_model") + tokenizer.save_pretrained("lora_model") if False: # Pushing to HF Hub - model.push_to_hub("hf/gpt-oss-finetune", token = "") - tokenizer.push_to_hub("hf/gpt-oss-finetune", token = "") + model.push_to_hub("hf/lora_model", token = "") + tokenizer.push_to_hub("hf/lora_model", token = "") # ### GGUF / llama.cpp Conversion diff --git a/python_scripts/gpt_oss_(20B)_500K_Context_Fine_tuning.py b/python_scripts/gpt_oss_(20B)_500K_Context_Fine_tuning.py index 34faca5ce..7c856750c 100644 --- a/python_scripts/gpt_oss_(20B)_500K_Context_Fine_tuning.py +++ b/python_scripts/gpt_oss_(20B)_500K_Context_Fine_tuning.py @@ -246,14 +246,14 @@ class Book: # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py index b5028fbc8..686505a99 100644 --- a/python_scripts/gpt_oss_(20B)_GRPO_BF16.py +++ b/python_scripts/gpt_oss_(20B)_GRPO_BF16.py @@ -783,14 +783,14 @@ def speed_check(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") -if False: model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") +if False: model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # And we're done! If you have any questions on Unsloth, we have a [Discord](https://discord.gg/unsloth) channel! If you find any bugs or want to keep updated with the latest LLM stuff, or need help, join projects etc, feel free to join our Discord! diff --git a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py index a677b1c64..bfec718c7 100644 --- a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py +++ b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game.py @@ -781,15 +781,15 @@ def strategy_succeeds(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") if False: - model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # # And we're done! diff --git a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py index ec575d25f..30b1f796e 100644 --- a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py +++ b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_BF16.py @@ -777,15 +777,15 @@ def strategy_succeeds(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") if False: - model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # # And we're done! diff --git a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.py b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.py index a677b1c64..bfec718c7 100644 --- a/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.py +++ b/python_scripts/gpt_oss_(20B)_Reinforcement_Learning_2048_Game_DGX_Spark.py @@ -781,15 +781,15 @@ def strategy_succeeds(completions, **kwargs): # Merge and push to hub in mxfp4 4bit format if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "mxfp4") + model.save_pretrained_merged("finetuned_model-mxfp4", tokenizer, save_method = "mxfp4") if False: - model.push_to_hub_merged("repo_id/repo_name", tokenizer, token = "hf...", save_method = "mxfp4") + model.push_to_hub_merged("repo_id/repo_name-mxfp4", tokenizer, token = "hf...", save_method = "mxfp4") # Merge and push to hub in 16bit if False: - model.save_pretrained_merged("finetuned_model", tokenizer, save_method = "merged_16bit") + model.save_pretrained_merged("finetuned_model-merged", tokenizer, save_method = "merged_16bit") if False: # Pushing to HF Hub - model.push_to_hub_merged("hf/gpt-oss-finetune", tokenizer, save_method = "merged_16bit", token = "") + model.push_to_hub_merged("hf/gpt-oss-finetune-merged", tokenizer, save_method = "merged_16bit", token = "") # # And we're done!