From 4b3510436bc86caaafb054c227c1721684ad60c3 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Fri, 12 Jun 2026 13:57:57 +0000 Subject: [PATCH] Whisper: pass processing_class instead of removed tokenizer kwarg transformers 5.x removed the deprecated tokenizer argument from Seq2SeqTrainer (4.x already warned: use processing_class instead), so the notebook dies at trainer construction on current installs. processing_class accepts the feature extractor on 4.57.6 and 5.x alike. Applied in original_template and synced to the generated nb, kaggle and python_scripts copies; a full regeneration was avoided on purpose since it rewrites unrelated notebooks. --- nb/Kaggle-Whisper.ipynb | 2 +- nb/Whisper.ipynb | 2 +- original_template/Whisper.ipynb | 2 +- python_scripts/Kaggle-Whisper.py | 2 +- python_scripts/Whisper.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nb/Kaggle-Whisper.ipynb b/nb/Kaggle-Whisper.ipynb index 63ae92e4..7cb92220 100644 --- a/nb/Kaggle-Whisper.ipynb +++ b/nb/Kaggle-Whisper.ipynb @@ -675,7 +675,7 @@ " train_dataset = train_dataset,\n", " data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor = tokenizer),\n", " eval_dataset = test_dataset,\n", - " tokenizer = tokenizer.feature_extractor,\n", + " processing_class = tokenizer.feature_extractor,\n", " compute_metrics = compute_metrics,\n", " args = Seq2SeqTrainingArguments(\n", " # predict_with_generate = True,\n", diff --git a/nb/Whisper.ipynb b/nb/Whisper.ipynb index ee0b26c4..a6bc5453 100644 --- a/nb/Whisper.ipynb +++ b/nb/Whisper.ipynb @@ -675,7 +675,7 @@ " train_dataset = train_dataset,\n", " data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor = tokenizer),\n", " eval_dataset = test_dataset,\n", - " tokenizer = tokenizer.feature_extractor,\n", + " processing_class = tokenizer.feature_extractor,\n", " compute_metrics = compute_metrics,\n", " args = Seq2SeqTrainingArguments(\n", " # predict_with_generate = True,\n", diff --git a/original_template/Whisper.ipynb b/original_template/Whisper.ipynb index 4813d71f..58398526 100644 --- a/original_template/Whisper.ipynb +++ b/original_template/Whisper.ipynb @@ -682,7 +682,7 @@ " train_dataset = train_dataset,\n", " data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=tokenizer),\n", " eval_dataset = test_dataset,\n", - " tokenizer = tokenizer.feature_extractor,\n", + " processing_class = tokenizer.feature_extractor,\n", " compute_metrics=compute_metrics,\n", " args = Seq2SeqTrainingArguments(\n", " # predict_with_generate=True,\n", diff --git a/python_scripts/Kaggle-Whisper.py b/python_scripts/Kaggle-Whisper.py index 577d2119..a1157a74 100644 --- a/python_scripts/Kaggle-Whisper.py +++ b/python_scripts/Kaggle-Whisper.py @@ -195,7 +195,7 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> train_dataset = train_dataset, data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor = tokenizer), eval_dataset = test_dataset, - tokenizer = tokenizer.feature_extractor, + processing_class = tokenizer.feature_extractor, compute_metrics = compute_metrics, args = Seq2SeqTrainingArguments( # predict_with_generate = True, diff --git a/python_scripts/Whisper.py b/python_scripts/Whisper.py index 9a2e6519..63dbdb98 100644 --- a/python_scripts/Whisper.py +++ b/python_scripts/Whisper.py @@ -195,7 +195,7 @@ def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> train_dataset = train_dataset, data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor = tokenizer), eval_dataset = test_dataset, - tokenizer = tokenizer.feature_extractor, + processing_class = tokenizer.feature_extractor, compute_metrics = compute_metrics, args = Seq2SeqTrainingArguments( # predict_with_generate = True,